## Load libraries
library(covid19)
library(ggplot2)
library(lubridate)
library(dplyr)
library(ggplot2)
library(sp)
library(raster)
library(viridis)
library(ggthemes)
library(sf)
library(rnaturalearth)
library(rnaturalearthdata)
library(RColorBrewer)
library(readr)
library(zoo)
library(tidyr)
options(scipen = '999')

Daily cases Spain

pd <- df_country %>%
  filter(country == 'Spain')

ggplot(data = pd,
       aes(x = date,
           y = cases_non_cum)) +
  geom_bar(stat = 'identity') +
  theme_simple() +
  labs(x = 'Fecha',
       y = 'Casos diarios',
       title = 'Casos confirmados nuevos')

Daily deahts in Spain

pd <- df_country %>%
  filter(country == 'Spain')

ggplot(data = pd,
       aes(x = date,
           y = deaths_non_cum)) +
  geom_bar(stat = 'identity') +
  theme_simple() +
  labs(x = 'Fecha',
       y = 'Muertes diarias',
       title = 'Muertes')

Daily cases world / population-adjusted

covid19::plot_day_zero(countries = c('Italy', 'Spain', 'US', 'Germany',
                                     'Canada', 'UK', 'Netherlands'
                                     ),
                       ylog = F,
                       day0 = 1,
                       cumulative = F,
                       calendar = T,
                       pop = T,
                       point_alpha = 0,
                       color_var = 'geo')

Cases per pop last week

pd <- df_country %>%
  left_join(world_pop %>% dplyr::select(iso, pop)) %>%
  group_by(country) %>%
  mutate(max_date = max(date)) %>%
  mutate(week_ago = max_date - 6) %>%
  # filter(date == max(date)) %>%
  filter(date >= week_ago, date <= max_date) %>%
  group_by(country) %>%
  summarise(y = sum(cases_non_cum),
            pop = dplyr::first(pop),
            date_range = paste0(min(date), ' - ', max(date)),
            yp = sum(cases_non_cum) / dplyr::first(pop) * 1000000) %>%
  ungroup %>%
  filter(pop > 1000000) %>%
  arrange(desc(yp)) %>%
  head(15) %>%
  mutate(country = ifelse(country == 'United Kingdom', 'UK', country))
pd$country <- factor(pd$country, levels = unique(pd$country))

ggplot(data = pd,
       aes(x = country,
           y = yp)) +
  geom_bar(stat = 'identity') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12)) +
  geom_text(aes(label = round(yp, digits = 0)),
            nudge_y = -50,
            color = 'white') +
  labs(x = '',
       y = 'Cases per 1,000,000 (last 7 days)',
       title = 'New confirmed COVID-19 cases per million population, last 7 days')

Deaths per pop last week

pd <- df_country %>%
  left_join(world_pop %>% dplyr::select(iso, pop)) %>%
  group_by(country) %>%
  mutate(max_date = max(date)) %>%
  mutate(week_ago = max_date - 6) %>%
  # filter(date == max(date)) %>%
  filter(date >= week_ago, date <= max_date) %>%
  group_by(country) %>%
  summarise(y = sum(deaths_non_cum),
            pop = dplyr::first(pop),
            date_range = paste0(min(date), ' - ', max(date)),
            yp = sum(deaths_non_cum) / dplyr::first(pop) * 1000000) %>%
  ungroup %>%
  filter(pop > 1000000) %>%
  arrange(desc(yp)) %>%
  head(10) %>%
  mutate(country = ifelse(country == 'United Kingdom', 'UK', country))
pd$country <- factor(pd$country, levels = unique(pd$country))

ggplot(data = pd,
       aes(x = country,
           y = yp)) +
  geom_bar(stat = 'identity') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12)) +
  geom_text(aes(label = round(yp, digits = 0)),
            nudge_y = -10,
            color = 'white') +
  labs(x = '',
       y = 'Deaths per 1,000,000 (last 7 days)',
       title = 'New confirmed COVID-19 deaths per million population, last 7 days')

Lombardia, Catalonia, Madrid

New cases in last week

pd <- esp_df %>%
  left_join(esp_pop %>% dplyr::select(ccaa, pop)) %>%
  mutate(country = 'Spain') %>%
  bind_rows(
    ita %>% left_join(ita_pop %>% dplyr::select(ccaa, pop)) %>% mutate(country = 'Italy')
  ) %>%
  bind_rows(
    df %>% filter(country == 'US') %>% mutate(ccaa = district) %>% left_join(regions_pop %>% dplyr::select(ccaa, pop)) %>% mutate(country = 'US')) %>%
  group_by(ccaa) %>%
  mutate(max_date = max(date)) %>%
  mutate(week_ago = max_date - 6) %>%
  # filter(date == max(date)) %>%
  filter(date >= week_ago, date <= max_date) %>%
  group_by(ccaa) %>%
  summarise(y = sum(cases_non_cum),
            country = dplyr::first(country),
            pop = dplyr::first(pop),
            date_range = paste0(min(date), ' - ', max(date))) %>%
  ungroup %>%
  mutate(yp = y / pop * 1000000) %>%
  ungroup %>%
  # filter(pop > 1000000) %>%
  arrange(desc(yp)) 

#Get country totals
pd %>%
  group_by(country) %>%
  summarise(y = sum(y, na.rm = T),
            pop = sum(pop, na.rm = T)) %>%
  ungroup %>%
  mutate(yp = y / pop * 1000000)
# A tibble: 3 x 4
  country      y       pop    yp
  <chr>    <dbl>     <dbl> <dbl>
1 Italy    27415  60491453  453.
2 Spain    34464  47026208  733.
3 US      218254 328239523  665.
library(knitr)
library(kableExtra)
pd <- pd %>%
  mutate(yp = round(yp, digits = 1)) %>%
  mutate(Rank = 1:nrow(pd)) %>%
  dplyr::select(Rank, 
                Región = ccaa,
                `Casos nuevos, 7 días` = y,
                Población = pop,
                `Casos nuevos 7 días por millón` = yp)
kable(pd) %>%
  kable_styling("striped", full_width = F) %>%
  column_spec(which(names(pd) == 'Casos nuevos 7 días por millón'), bold = T) %>%
  row_spec(which(pd$`Región` %in% esp_df$ccaa), bold = T, color = "white", background = "#D7261E")
Rank Región Casos nuevos, 7 días Población Casos nuevos 7 días por millón
1 New York 65873 19453561 3386.2
2 New Jersey 24345 8882190 2740.9
3 La Rioja 639 316798 2017.1
4 Massachusetts 12975 6892503 1882.5
5 Connecticut 6360 3565287 1783.9
6 CLM 3452 2032863 1698.1
7 Rhode Island 1743 1059361 1645.3
8 Louisiana 7585 4648794 1631.6
9 CyL 3512 2399548 1463.6
10 Navarra 861 654214 1316.1
11 Madrid 8423 6663394 1264.1
12 District of Columbia 873 705749 1237.0
13 Trentino-Alto Adige 1222 1070000 1142.1
14 Valle d’Aosta 139 126202 1101.4
15 Cataluña 7902 7675217 1029.5
16 País Vasco 2208 2207776 1000.1
17 Piemonte 4298 4376000 982.2
18 Delaware 952 973764 977.6
19 Pennsylvania 11349 12801989 886.5
20 Lombardia 8597 10040000 856.3
21 Michigan 8526 9986857 853.7
22 Maryland 4608 6045680 762.2
23 Illinois 9593 12671821 757.0
24 Emilia-Romagna 3009 4453000 675.7
25 Liguria 1045 1557000 671.2
26 Aragón 840 1319291 636.7
27 Veneto 2851 4905000 581.2
28 South Dakota 490 884659 553.9
29 Extremadura 590 1067710 552.6
30 Marche 839 1532000 547.7
31 Georgia 5805 10617423 546.7
32 Indiana 3517 6732219 522.4
33 Cantabria 294 581078 506.0
34 Galicia 1343 2699499 497.5
35 Colorado 2357 5758736 409.3
36 Mississippi 1143 2976149 384.1
37 Toscana 1388 3737000 371.4
38 Alabama 1798 4903185 366.7
39 Washington 2784 7614893 365.6
40 Florida 7545 21477737 351.3
41 Abruzzo 457 1315000 347.5
42 C. Valenciana 1726 5003769 344.9
43 Vermont 215 623989 344.6
44 Nevada 981 3080156 318.5
45 Friuli Venezia Giulia 383 1216000 315.0
46 Missouri 1922 6137428 313.2
47 Virginia 2634 8535519 308.6
48 Asturias 312 1022800 305.0
49 Tennessee 1875 6829174 274.6
50 New Mexico 575 2096829 274.2
51 South Carolina 1271 5148714 246.9
52 Iowa 718 3155070 227.6
53 New Hampshire 308 1359711 226.5
54 Kentucky 1008 4467673 225.6
55 Texas 6468 28995881 223.1
56 Nebraska 427 1934408 220.7
57 Ohio 2561 11689100 219.1
58 Utah 695 3205958 216.8
59 Kansas 593 2913314 203.5
60 Baleares 230 1149460 200.1
61 California 7761 39512223 196.4
62 Andalucía 1606 8414240 190.9
63 Idaho 329 1787065 184.1
64 North Carolina 1921 10488084 183.2
65 Oklahoma 716 3956971 180.9
66 Wisconsin 1021 5822434 175.4
67 Melilla 15 86487 173.4
68 Puglia 672 4048000 166.0
69 Lazio 965 5897000 163.6
70 West Virginia 269 1792147 150.1
71 Arkansas 443 3017804 146.8
72 Arizona 1056 7278717 145.1
73 Ceuta 12 84777 141.5
74 Canarias 295 2153389 137.0
75 Murcia 204 1493898 136.6
76 North Dakota 101 762062 132.5
77 Wyoming 73 578759 126.1
78 Sardegna 206 1648000 125.0
79 Minnesota 686 5639632 121.6
80 Maine 163 1344212 121.3
81 Alaska 87 731545 118.9
82 Campania 644 5827000 110.5
83 Oregon 459 4217737 108.8
84 Molise 33 308493 107.0
85 Montana 101 1068778 94.5
86 Umbria 80 884640 90.4
87 Hawaii 128 1415872 90.4
88 Sicilia 422 5027000 83.9
89 Calabria 128 1957000 65.4
90 Basilicata 37 567118 65.2
91 American Samoa 0 NA NA
92 Diamond Princess 0 NA NA
93 Grand Princess 0 NA NA
94 Guam 21 NA NA
95 Northern Mariana Islands 3 NA NA
96 Puerto Rico 422 NA NA
97 Recovered 0 NA NA
98 United States Virgin Islands 6 NA NA
99 US 1 NA NA
100 Virgin Islands 9 NA NA
101 Wuhan Evacuee 4 NA NA
102 NA 2 NA NA

Just Italy and Spain

xpd = pd %>% filter(`Región` %in% c(ita$ccaa, esp_df$ccaa))
xpd$Rank <- 1:nrow(xpd)

kable(xpd) %>%
  kable_styling("striped", full_width = F) %>%
  column_spec(which(names(xpd) == 'Casos nuevos 7 días por millón'), bold = T) %>%
  row_spec(which(xpd$`Región` %in% esp_df$ccaa), bold = T, color = "white", background = "#D7261E")
Rank Región Casos nuevos, 7 días Población Casos nuevos 7 días por millón
1 La Rioja 639 316798 2017.1
2 CLM 3452 2032863 1698.1
3 CyL 3512 2399548 1463.6
4 Navarra 861 654214 1316.1
5 Madrid 8423 6663394 1264.1
6 Trentino-Alto Adige 1222 1070000 1142.1
7 Valle d’Aosta 139 126202 1101.4
8 Cataluña 7902 7675217 1029.5
9 País Vasco 2208 2207776 1000.1
10 Piemonte 4298 4376000 982.2
11 Lombardia 8597 10040000 856.3
12 Emilia-Romagna 3009 4453000 675.7
13 Liguria 1045 1557000 671.2
14 Aragón 840 1319291 636.7
15 Veneto 2851 4905000 581.2
16 Extremadura 590 1067710 552.6
17 Marche 839 1532000 547.7
18 Cantabria 294 581078 506.0
19 Galicia 1343 2699499 497.5
20 Toscana 1388 3737000 371.4
21 Abruzzo 457 1315000 347.5
22 C. Valenciana 1726 5003769 344.9
23 Friuli Venezia Giulia 383 1216000 315.0
24 Asturias 312 1022800 305.0
25 Baleares 230 1149460 200.1
26 Andalucía 1606 8414240 190.9
27 Melilla 15 86487 173.4
28 Puglia 672 4048000 166.0
29 Lazio 965 5897000 163.6
30 Ceuta 12 84777 141.5
31 Canarias 295 2153389 137.0
32 Murcia 204 1493898 136.6
33 Sardegna 206 1648000 125.0
34 Campania 644 5827000 110.5
35 Molise 33 308493 107.0
36 Umbria 80 884640 90.4
37 Sicilia 422 5027000 83.9
38 Calabria 128 1957000 65.4
39 Basilicata 37 567118 65.2
x = esp_df %>% left_join(esp_pop) %>%
  bind_rows(ita %>% left_join(ita_pop)) %>%
  filter(date == '2020-04-09') %>%
  filter(ccaa %in% c('Madrid',
                     'Cataluña', 'Lombardia')) %>%
  dplyr::select(ccaa, deaths, cases, pop) %>%
  mutate(deathsp = deaths / pop * 100000,
         casesp = cases / pop * 100000)


covid19::plot_day_zero(countries = c('Italy', 'Spain', 'China', 'South Korea', 'Sinagpore'),
                       districts = c('Madrid', #'Hubei',
                     'Cataluña', 'Lombardia'),
                     by_district = T,
                     roll = 7,
                     deaths = F,
                     pop = T,
                     day0 = 0,
                     ylog = F,
                     calendar = T,
                     cumulative = F) +
  labs(x = 'Data',
       y = 'Casos diaris (mitjana mòbil de 7 dies)',
       title = 'Casos diaris per 1.000.000',
       subtitle = 'Mitjana mòbil de 7 dies') 

covid19::plot_day_zero(countries = c('Italy', 'Spain'),
                     roll = 7,
                     deaths = F,
                     pop = T,
                     day0 = 0,
                     ylog = F,
                     calendar = T,
                     cumulative = F) +
  labs(x = 'Data',
       y = 'Casos diaris per 1.000.000 (mitjana mòbil de 7 dies)',
       title = 'Casos diaris per 1.000.000',
       subtitle = 'Mitjana mòbil de 7 dies') +
  theme(legend.direction = 'horizontal',
        legend.position = 'top')

covid19::plot_day_zero(countries = c('Italy', 'Spain'),
                     roll = 7,
                     deaths = T,
                     pop = T,
                     day0 = 0,
                     ylog = F,
                     calendar = T,
                     cumulative = F) +
  labs(x = 'Data',
       y = 'Morts diaris per 1.000.000 (mitjana mòbil de 7 dies)',
       title = 'Morts diaris per 1.000.000',
       subtitle = 'Mitjana mòbil de 7 dies') +
  theme(legend.direction = 'horizontal',
        legend.position = 'top')

covid19::plot_day_zero(countries = c('Italy', 'Spain'),
                       by_district = T,
                       districts = c('Madrid', 'Emilia-Romagna',
                     'Cataluña', 'Lombardia'),
                     roll = 7,
                     deaths = F,
                     pop = T,
                     day0 = 0,
                     ylog = F,
                     calendar = T,
                     cumulative = F) +
  labs(x = 'Data',
       y = 'Casos diaris per 1.000.000 (mitjana mòbil de 7 dies)',
       title = 'Casos diaris per 1.000.000',
       subtitle = 'Mitjana mòbil de 7 dies') +
  theme(legend.direction = 'horizontal',
        legend.position = 'top')

Personas susceptibles

left <- esp_df %>%
  group_by(date) %>%
  summarise(cases = sum(cases))
left$pop <- esp_pop %>% summarise(pop = sum(pop)) %>% .$pop
left$susceptible <- left$pop - (left$cases * 10)
left$p <- left$susceptible / left$pop * 100

ggplot(data = left,
       aes(x = date,
           y = p)) +
  geom_line() +
  ylim(0, 100)

Asia

covid19::plot_day_zero(countries = c('Japan', 'South Korea', 'Singapore', 'Hong Kong'),
                     roll = 7,
                     deaths = F,
                     # pop = T,
                     day0 = 0,
                     ylog = F,
                     calendar = T,
                     cumulative = F) +
    labs(x = 'Data',
       y = 'Casos diaris (mitjana mòbil de 3 dies)',
       title = 'Casos diaris',
       subtitle = 'Mitjana mòbil de 3 dies') +
  facet_wrap(~geo, scales = 'free_y') +
  theme(legend.position = 'none')

Day of week analysis

pd <- esp_df %>%
  arrange(date) %>%
  group_by(date) %>%
  summarise(deaths_non_cum = sum(deaths_non_cum),
            cases_non_cum = sum(cases_non_cum)) %>%
  ungroup %>%  
  mutate(dow = weekdays(date)) %>%
  mutate(week = isoweek(date)) %>%
  group_by(week) %>%
  mutate(start_date = min(date)) %>%
  ungroup %>%
  filter(date >= '2020-03-09')
pd$dow <- factor(pd$dow,
                 levels = c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
                            'Saturday', 'Sunday'),
                 labels = c('Lunes', 'Martes', 'Miércoles', 'Jueves', 'Viernes',
                            'Sábado', 'Domingo'))
n_cols <- length(unique(pd$start_date))
cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = 'Spectral'))(n_cols)
pd$start_date <- factor(pd$start_date)
ggplot(data = pd,
       aes(x = dow,
           y = cases_non_cum,
           group = week,
           color = start_date)) +
  geom_line(size = 4) +
  geom_point(size = 4) +
  scale_color_manual(name = 'Primer día\nde la semana',
                     values = cols) +
  theme_simple() +
  labs(x = 'Día de la semana',
       y = 'Muertes')

Basque country vs rest of Spain

pd <- esp_df %>%
  mutate(geo = ifelse(ccaa == 'País Vasco', 'Basque country', 'Rest of Spain')) %>%
  group_by(geo, date) %>%
  summarise(deaths = sum(deaths)) %>%
  ungroup
pp <- esp_pop %>%
    mutate(geo = ifelse(ccaa == 'País Vasco', 'Basque country', 'Rest of Spain')) %>%
  group_by(geo) %>%
  summarise(pop = sum(pop))
pd <- left_join(pd, pp) %>% mutate(pk = deaths / pop * 100000)

ggplot(data = pd %>% filter(pk > 0.1),
       aes(x = date,
           y = pk,
           color = geo)) +
  geom_line() +
  labs(x = 'Date',
       y = 'Cumulative deaths per 100,000') +
  scale_color_manual(name = '',
                     values = c('red', 'black')) +
  theme_simple() 

Country trajectories, population adjusted

countries <- c(
  'Spain',
  'US',
  'France',
  # 'Portugal',
  'Italy',
  'China'
)
districts <- c('Lombardia', 'Cataluña', 
               'New York', 
               # 'Hubei',
               'CyL', 
               'CLM', 
               # 'Washington',
               'La Rioja',
               'Madrid')

plot_day_zero(countries = countries,
              districts = districts,
              ylog = F,
              day0 = 1,
              cumulative = F,
              time_before = 0,
              roll = 3,
              deaths = T,
              pop = T,
              by_district = T,
              point_alpha = 0,
              line_size = 3,
              color_var = 'geo')

Italy and Spain

dir.create('/tmp/totesmou')
plot_day_zero(countries = c('Spain', 'Italy', max_date = Sys.Date()-1),
              point_size = 2, calendar = T)

ggsave('/tmp/totesmou/1_italy_vs_spain.png',
       height = 5.6,
       width = 9.6)
plot_day_zero(countries = c('Spain', 'Italy'),
              point_size = 2, calendar = F)

ggsave('/tmp/totesmou/2_italy_vs_spain_temps_ajustat.png',
       height = 5.6,
       width = 9.6)
plot_day_zero(countries = c('Spain', 'Italy'),
              point_size = 2, calendar = T, deaths = T, day0 = 10)

plot_day_zero(countries = c('Spain', 'Italy'),
              point_size = 2, calendar = F, deaths = T, day0 = 10)

plot_day_zero(countries = c('Spain', 'Italy'),
              point_size = 2, calendar = F, deaths = T, day0 = 10, pop = T)

plot_day_zero(countries = c('Spain', 'Italy'),
              point_size = 2, calendar = F, deaths = T, day0 = 1, pop = T, roll = 3, roll_fun = 'mean')

plot_day_zero(countries = c('Spain', 'Italy'),
              districts = c('Cataluña', 'Madrid',
                            'Lombardia', 'Emilia-Romagna'),
              by_district = T,
              day0 = 10,
              pop = T)

plot_day_zero(countries = c('Spain', 'Italy'),
              districts = c('Cataluña', 'Madrid',
                            'Lombardia', 'Emilia-Romagna'),
              by_district = T,
              deaths = T,
              day0 = 1,
              pop = T)

plot_day_zero(countries = c('Spain', 'Italy'),
              districts = c('Cataluña', 'Madrid',
                            'Lombardia', 'Emilia-Romagna'),
              by_district = T,
              deaths = T,
              roll = 3,
              roll_fun = 'mean',
              day0 = 1,
              ylog = F,
              pop = T, calendar = T)

plot_day_zero(countries = c('Spain', 'Italy', 'US'),
              districts = c('Cataluña', 'Madrid', 
                            'New York', 
                            'Lombardia', 'Emilia-Romagna'),
              by_district = T,
              deaths = T,
              roll = 3,
              roll_fun = 'mean',
              day0 = 1,
              pop = T)

plot_day_zero(countries = c('Spain', 'Italy', 'US'),
              districts = c('Cataluña', 'Madrid', 
                            'New York', 
                            'Lombardia', 'Emilia-Romagna'),
              by_district = T,
              deaths = T,
              # roll = 7,
              roll_fun = 'mean',
              day0 = 1,
              pop = F)

plot_day_zero(color_var = 'iso', by_district = T,
              deaths = T,
              day0 = 1,
              alpha = 0.6,
              point_alpha = 0,
              calendar = T,
              countries = c('Spain', 'Italy'),
              pop = T)

place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
                                      # ifelse(x == 'Cataluña', 'Cataluña',
                                             'Altres CCAA')
  # )
}
place_transform_ita <- function(x){
  ifelse(x == 'Lombardia', 'Lombardia', 
         # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 
                'Altres regions italianes')
  # )
}
pd <- esp_df %>% mutate(country = 'Espanya') %>%
  mutate(ccaa = place_transform(ccaa)) %>%
  bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
                           country = 'Itàlia')) %>%
  group_by(country, ccaa, date) %>% 
  summarise(cases = sum(cases),
            uci = sum(uci),
            deaths = sum(deaths),
            cases_non_cum = sum(cases_non_cum),
            deaths_non_cum = sum(deaths_non_cum),
            uci_non_cum = sum(uci_non_cum)) %>%
  left_join(esp_pop %>%
              mutate(ccaa = place_transform(ccaa)) %>%
              bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
              group_by(ccaa) %>%
              summarise(pop = sum(pop))) %>%
  mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
  group_by(country, date) %>%
  mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
         p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
                  levels = c('Madrid',
                             # 'Cataluña',
                             'Altres CCAA',
                             'Lombardia',
                             # 'Emilia Romagna',
                             'Altres regions italianes'))
cols <- c(
  rev(brewer.pal(n = 3, 'Reds'))[1:2],
  rev(brewer.pal(n = 3, 'Blues'))[1:2]
)

label_data <- pd %>%
  filter(country  %in% c('Itàlia', 'Espanya')) %>%
  group_by(country) %>%
  filter(date == max(date))  %>%
  mutate(label = gsub('\nitalianas', '',  gsub(' ', '\n', ccaa))) %>%
  mutate(x = date - 2,
         y = p_deaths_country + 
           ifelse(p_deaths_country > 50, 10, -9))
ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=50])) %>% filter(date >= start_day),
       aes(x = date,
           y = p_deaths_country,
           color = ccaa,
           group = ccaa)) +
  # geom_bar(stat = 'identity',
  #          position = position_dodge(width = 0.99)) +
  scale_fill_manual(name = '', values = cols) +
  scale_color_manual(name = '', values = cols) +
  geom_line(size = 2,
            aes(color = ccaa)) +
  geom_point(size = 3,
             aes(color = ccaa)) +
  facet_wrap(~country) +
  # xlim(as.Date('2020-03-09'),
  #      Sys.Date()-1) +
  theme_simple() +
  geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
  # geom_line(lty = 2, alpha = 0.6) +
  labs(x = 'Data',
       y = 'Percentatge de morts',
       title = 'Percentatge de morts: regió més afectada vs resta del pais',
       subtitle = 'A partir del primer día a cada país amb 50 morts acumulades') +
  theme(legend.position = 'top',
        strip.text = element_text(size = 30),
        legend.text = element_text(size = 10))  +
  guides(color = guide_legend(nrow = 2,
                              keywidth = 5)) +
  geom_text(data = label_data,
            aes(x = x-2,
                y = y,
                label = label,
                color = ccaa),
            size = 7,
            show.legend = FALSE)

# Same cahrt as previous, but one shared axis
place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
                                      # ifelse(x == 'Cataluña', 'Cataluña',
                                             'Rest of Spain')
  # )
}
place_transform_ita <- function(x){
  ifelse(x == 'Lombardia', 'Lombardia', 
         # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 
                'Rest of Italy')
  # )
}
pd <- esp_df %>% mutate(country = 'Spain') %>%
  mutate(ccaa = place_transform(ccaa)) %>%
  bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
                           country = 'Italy')) %>%
  group_by(country, ccaa, date) %>% 
  summarise(cases = sum(cases),
            uci = sum(uci),
            deaths = sum(deaths),
            cases_non_cum = sum(cases_non_cum),
            deaths_non_cum = sum(deaths_non_cum),
            uci_non_cum = sum(uci_non_cum)) %>%
  left_join(esp_pop %>%
              mutate(ccaa = place_transform(ccaa)) %>%
              bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
              group_by(ccaa) %>%
              summarise(pop = sum(pop))) %>%
  mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
  group_by(country, date) %>%
  mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
         p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
                  levels = c('Madrid',
                             # 'Cataluña',
                             'Rest of Spain',
                             'Lombardia',
                             # 'Emilia Romagna',
                             'Rest of Italy'))
cols <- c(
  rev(brewer.pal(n = 3, 'Reds'))[1:2],
  rev(brewer.pal(n = 3, 'Blues'))[1:2]
)

label_data <- pd %>%
  filter(country  %in% c('Italy', 'Spain')) %>%
  group_by(country) %>%
  filter(date == max(date))  %>%
  # mutate(label = gsub('\nitalianas', '',  gsub(' ', '\n', ccaa))) %>%
  mutate(x = date - 2,
         y = p_deaths_country + 
           ifelse(p_deaths_country > 50, 10, -9)) %>%
  ungroup
ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=30])) %>% filter(date >= start_day),
       aes(x = date,
           y = p_deaths_country,
           color = ccaa,
           group = ccaa)) +
  # geom_bar(stat = 'identity',
  #          position = position_dodge(width = 0.99)) +
  scale_fill_manual(name = '', values = cols) +
  scale_color_manual(name = '', values = cols) +
  geom_line(size = 2,
            aes(color = ccaa)) +
  geom_point(size = 3,
             aes(color = ccaa)) +
  # facet_wrap(~country, scales = 'free_x') +
  # xlim(as.Date('2020-03-09'),
  #      Sys.Date()-1) +
  theme_simple() +
  geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
  # geom_line(lty = 2, alpha = 0.6) +
  labs(x = 'Date',
       y = 'Percentage of deaths',
       title = 'Percentage of country\'s cumulative COVID-19 deaths by geography',
       subtitle = 'Starting at first day with 50 or more cumulative deaths') +
  theme(legend.position = 'top',
        strip.text = element_text(size = 30),
        legend.text = element_text(size = 10))  +
  guides(color = guide_legend(nrow = 2,
                              keywidth = 5))# +

  # geom_text(data = label_data,
  #           aes(x = x-2,
  #               y = y,
  #               label = label,
  #               color = ccaa),
  #           size = 7,
  #           show.legend = FALSE)

Same chart as above but absolute numbers

# Same cahrt as previous, but one shared axis
place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
                                      # ifelse(x == 'Cataluña', 'Cataluña',
                                             'Rest of Spain')
  # )
}
place_transform_ita <- function(x){
  ifelse(x == 'Lombardia', 'Lombardia', 
         # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 
                'Rest of Italy')
  # )
}
pd <- esp_df %>% mutate(country = 'Spain') %>%
  mutate(ccaa = place_transform(ccaa)) %>%
  bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
                           country = 'Italy')) %>%
  group_by(country, ccaa, date) %>% 
  summarise(cases = sum(cases),
            uci = sum(uci),
            deaths = sum(deaths),
            cases_non_cum = sum(cases_non_cum),
            deaths_non_cum = sum(deaths_non_cum),
            uci_non_cum = sum(uci_non_cum)) %>%
  left_join(esp_pop %>%
              mutate(ccaa = place_transform(ccaa)) %>%
              bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
              group_by(ccaa) %>%
              summarise(pop = sum(pop))) %>%
  mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
  group_by(country, date) %>%
  mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
         p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
                  levels = rev(c('Madrid',
                             # 'Cataluña',
                             'Rest of Spain',
                             'Lombardia',
                             # 'Emilia Romagna',
                             'Rest of Italy')))
cols <- c(
  rev(brewer.pal(n = 3, 'Reds'))[1:2],
  rev(brewer.pal(n = 3, 'Blues'))[1:2]
)

label_data <- pd %>%
  filter(country  %in% c('Italy', 'Spain')) %>%
  group_by(country) %>%
  filter(date == max(date))  %>%
  # mutate(label = gsub('\nitalianas', '',  gsub(' ', '\n', ccaa))) %>%
  mutate(x = date - 2,
         y = p_deaths_country + 
           ifelse(p_deaths_country > 50, 10, -9)) %>%
  ungroup

# Get moving
ma <- function(x, n = 2){
    
    if(length(x) >= n){
      stats::filter(x, rep(1 / n, n), sides = 1)
    } else {
      new_n <- length(x)
      stats::filter(x, rep(1 / new_n, new_n), sides = 1)
    }
}


ggplot(data = pd %>% group_by(country) %>% 
         mutate(start_day = dplyr::first(date[deaths >=1])) %>% 
         filter(date >= start_day) %>% 
         mutate(days_since = as.numeric(date - start_day)) %>%
         ungroup %>% arrange(date) %>%
         group_by(country, ccaa) %>%
         mutate(rolling_average = ma(deaths_non_cum, n = 3)) %>%
         ungroup,
       aes(x = date,
           y = rolling_average,
           color = ccaa,
           group = ccaa)) +
  # geom_bar(stat = 'identity',
  #          position = position_dodge(width = 0.99)) +
  scale_fill_manual(name = '', values = cols) +
  scale_color_manual(name = '', values = cols) +
  geom_line(size = 2,
            aes(color = ccaa)) +
  geom_point(size = 3,
             aes(color = ccaa)) +
  # scale_y_log10(limits = c(1.5, 1000)) +
  # scale_y_log10() +
  facet_wrap(~country) +
  # xlim(as.Date('2020-03-09'),
  #      Sys.Date()-1) +
  theme_simple() +
  # geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
  # geom_line(lty = 2, alpha = 0.6) +
  labs(x = 'Date',
       y = 'Deaths (log-scale)',
       title = 'Daily COVID-19 deaths by geography',
       subtitle = '3-day rolling average') +
  theme(legend.position = 'top',
        plot.title = element_text(size = 30),
        plot.subtitle = element_text(size = 24),
        strip.text = element_text(size = 30, hjust = 0.5),
        legend.text = element_text(size = 20))  +
  guides(color = guide_legend(nrow = 2,
                              keywidth = 5))

ggsave('~/Desktop/madlom.png')
roll_curve <- function(data,
                       n = 4,
                       deaths = FALSE,
                       scales = 'fixed',
                       nrow = NULL,
                       ncol = NULL,
                       pop = FALSE){

  # Create the n day rolling avg
  ma <- function(x, n = 5){
    
    if(length(x) >= n){
      stats::filter(x, rep(1 / n, n), sides = 1)
    } else {
      new_n <- length(x)
      stats::filter(x, rep(1 / new_n, new_n), sides = 1)
    }
    
    
  }
  
  pd <- data
  if(deaths){
    pd$var <- pd$deaths_non_cum
  } else {
    pd$var <- pd$cases_non_cum
  }
  
  if('ccaa' %in% names(pd)){
    pd$geo <- pd$ccaa
  } else {
    pd$geo <- pd$iso
  }
  
  if(pop){
    # try to get population
    if(any(pd$geo %in% unique(esp_df$ccaa))){
      right <- esp_pop
      right_var <- 'ccaa'
    } else {
      right <- world_pop
      right_var <- 'iso'
    }
    pd <- pd %>% left_join(right %>% dplyr::select(all_of(right_var), pop),
                           by = c('geo' = right_var)) %>%
      mutate(var = var / pop * 100000)
  }
  
  pd <- pd %>%
    arrange(date) %>%
    group_by(geo) %>%
    mutate(with_lag = ma(var, n = n))
  
  
  ggplot() +
    geom_bar(data = pd,
         aes(x = date,
             y = var),
             stat = 'identity',
         fill = 'grey',
         alpha = 0.8) +
    geom_area(data = pd,
              aes(x = date,
                  y = with_lag),
              color = 'red',
              fill = 'red',
              alpha = 0.6) +
    facet_wrap(~geo, scales = scales, nrow = nrow, ncol = ncol) +
    theme_simple() +
    labs(x = '',
         y = ifelse(deaths, 'Deaths', 'Cases'),
         title = paste0('Daily (non-cumulative) ', ifelse(deaths, 'deaths', 'cases'),
                        ifelse(pop, ' (per 100,000)', '')),
         subtitle = paste0('Data as of ', max(data$date),
                           '.\nRed line = ', n, ' day rolling average. Grey bar = day-specific value.')) +
    theme(axis.text.x = element_text(size = 7,
                                     angle = 90,
                                     hjust = 0.5, vjust = 1)) #+
    # scale_x_date(name ='',
    #              breaks = sort(unique(pd$date)),
    #              labels = format(sort(unique(pd$date)), '%b %d'))
    # scale_y_log10()
}
this_ccaa <- 'Cataluña'
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
roll_curve(plot_data, scales = 'fixed')  + theme(strip.text = element_text(size = 30))

plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
roll_curve(plot_data, deaths = T, scales = 'fixed') + theme(strip.text = element_text(size = 30))

african_countries <-  world_pop$country[world_pop$sub_region %in% c('Sub-Saharan Africa')]

pd <- plot_day_zero(countries = c(african_countries),
                    day0 = 10,
                    max_date = Sys.Date() - 13)
pd

pd <- plot_day_zero(countries = c(african_countries),
                    day0 = 10,
                    max_date = Sys.Date() - 6)
pd

pd <- plot_day_zero(countries = c(african_countries),
                    day0 = 10,
                    max_date = Sys.Date(),
                    ylog = F)
pd

latam_countries <-  world_pop$country[world_pop$sub_region %in% c('Latin America and the Caribbean')]
latam_countries <- latam_countries[!latam_countries %in% c('Guyana')]

pd <- plot_day_zero(countries = c(latam_countries),
                    day0 = 10,
                    max_date = Sys.Date() - 13)
pd

pd <- plot_day_zero(countries = c(latam_countries),
                    day0 = 10,
                    max_date = Sys.Date() - 6)
pd

pd <- plot_day_zero(countries = c(latam_countries),
                    day0 = 10,
                    max_date = Sys.Date())
pd

Latin America and Africa vs Europe

isos <- sort(unique(world_pop$sub_region))
keep_countries <- world_pop$country[world_pop$sub_region %in% c('Latin America and the Caribbean', 'Sub-Saharan Africa') |
                                      world_pop$region %in% 'Europe']
keep_countries <- keep_countries[!keep_countries %in% c('Guyana')]
pd <- df_country %>% ungroup %>%
  filter(country %in% keep_countries) %>%
  dplyr::select(-country) %>%
  left_join(world_pop) %>%
  group_by(iso) %>%
  mutate(day0 = min(date[cases >= 10])) %>%
  ungroup %>%
  mutate(days_since = date - day0) %>%
  filter(days_since >= 0)


cols <- c( 'black')
g <- ggplot(data = pd,
       aes(x = days_since,
           y = cases,
           group = country,
           color = region)) +
  geom_line(data = pd %>% filter(region == 'Europe'),
            alpha = 0.6) +
  scale_y_log10() +
  scale_color_manual(name = '', values = cols) +
  theme_simple() +
  labs(x = 'Days since first day at 10 cases') +
  theme(legend.position = 'top')
g

cols <- c('darkred', 'black')

g + 
    geom_line(data = pd %>% filter(region == 'Africa'),
            # alpha = 1,
            size = 1.5,
            alpha = 0.8) +
    scale_color_manual(name = '', values = cols) +
  scale_y_continuous()

cols <- c( 'darkorange', 'black')
g + 
    geom_line(data = pd %>% filter(sub_region == 'Latin America and the Caribbean'),
            # alpha = 1,
            size = 1.5,
            alpha = 0.8) +
    scale_color_manual(name = '', values = cols) 

cols <- c('darkred', 'darkorange', 'black')
g + 
    geom_line(data = pd %>% filter(sub_region != 'Europe'),
            # alpha = 1,
            size = 1.5,
            alpha = 0.8) +
    scale_color_manual(name = '', values = cols) 

# Assets
pyramid_dir <- '../../data-raw/pyramids/'
pyramid_files <- dir(pyramid_dir)
out_list <- list()
for(i in 1:length(pyramid_files)){
  out_list[[i]] <- read_csv(paste0(pyramid_dir, pyramid_files[i])) %>%
    mutate(region = gsub('.csv', '', pyramid_files[i]))
}
pyramid <- bind_rows(out_list)
make_pyramid <- function(the_region = 'AFRICA-2019'){
  sub_data <- pyramid %>% filter(region == the_region)
  sub_data$Age <- factor(sub_data$Age, levels = sub_data$Age)
  sub_data <- tidyr::gather(sub_data, key, value, M:F)
  ggplot(data = sub_data,
         aes(x = Age,
             y = value,
             fill = key)) +
    geom_bar(stat = 'identity',
             position = position_dodge()) +
    scale_fill_manual(name = '', values = c('darkgrey', 'lightblue')) +
    theme_simple() +
    labs(x = 'Age group',
         y = 'Population') +
    theme(legend.position = 'top')
}
make_pyramid_overlay <- function(){
  sub_data <- pyramid %>% filter(region %in% c('EUROPE-2019',
                                               'AFRICA-2019',
                                               'LATIN AMERICA AND THE CARIBBEAN-2019')) %>%
    mutate(region = gsub('-2019', '', region))
   sub_data$Age <- factor(sub_data$Age, levels = unique(sub_data$Age))
  sub_data <- tidyr::gather(sub_data, key, value, M:F) %>%
    group_by(Age, region) %>%
    summarise(value = sum(value)) %>%
    ungroup %>%
    group_by(region) %>%
    mutate(p = value / sum(value) * 100) %>%
    ungroup
  ggplot(data = sub_data,
         aes(x = Age,
             y = p,
             color = region,
             group = region,
             fill = region)) +
    geom_area(position = position_dodge(),
              alpha = 0.6) +
    scale_fill_manual(name = '',
                      values = c('darkred', 'darkorange', 'black')) +
    scale_color_manual(name = '',
                      values = c('darkred', 'darkorange', 'black')) +
    theme_simple() +
    theme(legend.position = 'top') +
    labs(x = 'Age group', y = 'Percentage')
}

make_pyramid(the_region = 'Spain-2019') + labs(title = 'Spain')
make_pyramid(the_region = 'Italy-2019') + labs(title = 'Italy')

make_pyramid(the_region = 'EUROPE-2019') + labs(title = 'Europe')
make_pyramid(the_region = 'Kenya-2019') + labs(title = 'Kenya')


make_pyramid(the_region = 'Guatemala-2019') + labs(title = 'Guatemala')

make_pyramid(the_region = 'AFRICA-2019') + labs(title = 'Africa')

make_pyramid(the_region = 'LATIN AMERICA AND THE CARIBBEAN-2019') + labs(title = 'Latin America and the Caribbean')

make_pyramid_overlay() + labs(title = 'Population distribution by region')

Pics

plot_day_zero(countries = c('China', 'Italy', 'Spain'),
              districts = c('Hubei', 'Lombardia', 'Cataluña', 'Madrid'),
              by_district = T,
              point_alpha = 0,
              day0 = 5,
              pop = F,
              deaths = T,
              ylog = T,
              calendar = F,
              roll = 5)

Map of portugal, france, spain

# cat_transform <- function(x){ifelse(x == 'Catalunya', 'Cataluña', x)}
cat_transform <- function(x){return(x)}
pd <- por_df %>% mutate(country = 'Portugal') %>%
  bind_rows(esp_df %>% mutate(country = 'Spain')) %>%
  bind_rows(fra_df %>% mutate(country = 'France')) %>%
  bind_rows(ita %>% mutate(country = 'Italy')) %>%
  bind_rows(
    df %>% filter(country == 'Andorra') %>%
      mutate(ccaa = 'Andorra')
  )
keep_date = pd %>% group_by(country) %>% summarise(max_date= max(date)) %>% summarise(x = min(max_date)) %>% .$x
pd <- pd %>%
  mutate(ccaa = cat_transform(ccaa)) %>%
  group_by(ccaa) %>%
  filter(date == keep_date) %>%
  # filter(date == '2020-03-27') %>%
  ungroup %>%
  dplyr::select(date, ccaa, deaths, deaths_non_cum,
                cases, cases_non_cum) %>%
  left_join(regions_pop %>%
              bind_rows(
                world_pop %>% filter(country == 'Andorra') %>% dplyr::mutate(ccaa = country) %>%
                  dplyr::select(-region, -sub_region)
              )) %>%
  mutate(cases_per_million = cases / pop * 1000000,
         deaths_per_million = deaths / pop * 1000000) %>%
  mutate(cases_per_million_non_cum = cases_non_cum / pop * 1000000,
         deaths_per_million_non_cum = deaths_non_cum / pop * 1000000)

map_esp1 <- map_esp
map_esp1@data <- map_esp1@data %>% dplyr::select(ccaa)
map_fra1 <- map_fra
map_fra1@data <- map_fra1@data %>% dplyr::select(ccaa = NAME_1)
map_por1 <- map_por
map_por1@data <- map_por1@data %>% dplyr::select(ccaa = CCDR)
map_ita1 <- map_ita 
map_ita1@data <- map_ita1@data %>% dplyr::select(ccaa)
map_and1 <- map_and
map_and1@data <- map_and1@data %>% dplyr::select(ccaa = NAME_0)


map <- 
  rbind(map_esp1,
        map_por1,
        map_fra1,
        map_ita1,
        map_and1)

# Remove areas not of interest
centroids <- coordinates(map)
centroids <- data.frame(centroids)
names(centroids) <- c('x', 'y')
centroids$ccaa <- map@data$ccaa
centroids <- left_join(centroids, pd)
# map <- map_sp <- map[centroids$y >35 & centroids$x > -10 &
#              centroids$x < 8 & (centroids$y < 43  | map@data$ccaa %in% c('Occitanie', 'Nouvelle-Aquitaine') |
#                                   map@data$ccaa %in% esp_df$ccaa),]
# map_sp <- map <-  map[centroids$x > -10 & centroids$y <47,]
map_sp <- map <-  map[centroids$x > -10 & centroids$y <77,]

# map_sp <- map <-  map[centroids$x > -10,]

# fortify
map <- fortify(map, region = 'ccaa')

# join data
map$ccaa <- map$id
map <- left_join(map, pd)
var <- 'deaths_per_million'
map$var <- as.numeric(unlist(map[,var]))
centroids <- centroids[,c('ccaa', 'x', 'y', var)]
centroids <- centroids %>%
  filter(ccaa %in% map_sp@data$ccaa)

# cols <- rev(RColorBrewer::brewer.pal(n = 9, name = 'Spectral'))
# cols <- c('#A16928','#bd925a','#d6bd8d','#edeac2','#b5c8b8','#79a7ac','#2887a1')
# cols <- c('#009392','#39b185','#9ccb86','#e9e29c','#eeb479','#e88471','#cf597e')
# cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c')
cols <- rev(colorRampPalette(c('darkred', 'red', 'darkorange', 'orange', 'yellow', 'lightblue'))(10))
g <- ggplot(data = map,
         aes(x = long,
             y = lat,
             group = group)) +
    geom_polygon(aes(fill = var),
                 lwd = 0.3,
                 # color = 'darkgrey',
                 color = NA,
                 size = 0.6) +
      scale_fill_gradientn(name = '',
                           colours = cols) +
  # scale_fill_() +
  ggthemes::theme_map() +
  theme(legend.position = 'bottom',
        plot.title = element_text(size = 16)) +
  guides(fill = guide_colorbar(direction= 'horizontal',
                               barwidth = 50,
                               barheight = 1,
                               label.position = 'bottom')) +
  labs(title = 'Cumulative COVID-19 deaths per million population, western Mediterranean',
       subtitle = paste0('Data as of ', format(max(pd$date), '%B %d, %Y'))) +
  geom_text(data = centroids,
            aes(x = x,
                y = y,
                group = NA,
                label = paste0(ccaa, '\n',
                               round(deaths_per_million, digits = 2))),
            alpha = 0.8,
            size = 3)
g

ggsave('/tmp/map_with_borders.png',
       height = 8, width = 13)

Animation, Portugal, France, Spain, Italy

dir.create('/tmp/animation_map/')
pd <- por_df %>% mutate(country = 'Portugal') %>%
  bind_rows(esp_df %>% mutate(country = 'Spain')) %>%
  bind_rows(fra_df %>% mutate(country = 'France')) %>%
  bind_rows(ita %>% mutate(country = 'Italy')) %>%
  bind_rows(
    df %>% filter(country == 'Andorra') %>%
      mutate(ccaa = 'Andorra')
  )
pd %>% group_by(country) %>% summarise(max_date= max(date))
# A tibble: 5 x 2
  country  max_date  
  <chr>    <date>    
1 Andorra  2020-04-12
2 France   2020-04-11
3 Italy    2020-04-12
4 Portugal 2020-04-12
5 Spain    2020-04-12
unique_dates <- sort(unique(pd$date))
unique_dates <- unique_dates[unique_dates >= '2020-03-01']
popper <- regions_pop %>%
                bind_rows(
                  world_pop %>% filter(country == 'Andorra') %>% dplyr::mutate(ccaa = country) %>%
                    dplyr::select(-region, -sub_region)
                )


map_esp1 <- map_esp
map_esp1@data <- map_esp1@data %>% dplyr::select(ccaa)
map_fra1 <- map_fra
map_fra1@data <- map_fra1@data %>% dplyr::select(ccaa = NAME_1)
map_por1 <- map_por
map_por1@data <- map_por1@data %>% dplyr::select(ccaa = CCDR)
map_ita1 <- map_ita 
map_ita1@data <- map_ita1@data %>% dplyr::select(ccaa)
map_and1 <- map_and
map_and1@data <- map_and1@data %>% dplyr::select(ccaa = NAME_0)


map <- 
  rbind(map_esp1,
        map_por1,
        map_fra1,
        map_ita1,
        map_and1)

# Remove areas not of interest
centroids <- coordinates(map)
centroids <- data.frame(centroids)
names(centroids) <- c('x', 'y')
centroids$ccaa <- map@data$ccaa
# map <- map_sp <- map[centroids$y >35 & centroids$x > -10 &
#              # centroids$x < 8 &
#                (centroids$y < 43  | map@data$ccaa %in% c('Occitanie', 'Nouvelle-Aquitaine') |
#                                   map@data$ccaa %in% esp_df$ccaa),]
# map_sp <- map <-  map[centroids$x > -10 & centroids$y <47,]
map_sp <- map <-  map[centroids$x > -10 & centroids$y <477,]


# fortify
map <- fortify(map, region = 'ccaa')



for(i in 1:length(unique_dates)){
  this_date <- unique_dates[i]
    today_map <- map
    today_centroids <- centroids
    today_pd <- pd

  today_pd <- today_pd %>%
      mutate(ccaa = cat_transform(ccaa)) %>%
    group_by(ccaa) %>%
    # filter(date == max(date)) %>%
    filter(date == this_date) %>%
    ungroup %>%
    dplyr::select(date, ccaa, deaths, deaths_non_cum,
                  cases, cases_non_cum) %>%
    left_join(popper) %>%
    mutate(cases_per_million = cases / pop * 1000000,
           deaths_per_million = deaths / pop * 1000000) %>%
    mutate(cases_per_million_non_cum = cases_non_cum / pop * 1000000,
           deaths_per_million_non_cum = deaths_non_cum / pop * 1000000)
  
  today_centroids <- left_join(today_centroids, today_pd)

  
  # join data
  today_map$ccaa <- today_map$id
  today_map <- left_join(today_map, today_pd)
  var <- 'deaths_per_million'
  today_map$var <- as.numeric(unlist(today_map[,var]))
  today_map$var <- ifelse(is.na(today_map$var), 0, today_map$var)
  today_centroids <- today_centroids[,c('ccaa', 'x', 'y', var)]
  today_centroids <- today_centroids %>%
    filter(ccaa %in% today_map$ccaa)
  today_centroids$var <- today_centroids[,var]
  today_centroids$var <- ifelse(is.na(today_centroids$var), 0, today_centroids$var)

  
  # cols <- rev(RColorBrewer::brewer.pal(n = 9, name = 'Spectral'))
  # cols <- c('#A16928','#bd925a','#d6bd8d','#edeac2','#b5c8b8','#79a7ac','#2887a1')
  # cols <- c('#009392','#39b185','#9ccb86','#e9e29c','#eeb479','#e88471','#cf597e')
  # cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c')
  cols <- rev(colorRampPalette(c('darkred', 'red', 'darkorange', 'orange', 'yellow', 'white'))(17))
  g <- ggplot(data = today_map,
           aes(x = long,
               y = lat,
               group = group)) +
      geom_polygon(aes(fill = var),
                   lwd = 0.3,
                   # color = 'darkgrey',
                   color = NA,
                   size = 0.6) +
        scale_fill_gradientn(name = '',
                             colours = cols,
                             breaks = seq(0, 1100, 50),
                             limits = c(0, 1100)) +
    # scale_fill_() +
    ggthemes::theme_map() +
    theme(legend.position = 'right',
          plot.title = element_text(size = 24)) +
    guides(fill = guide_colorbar(direction= 'vertical',
                                 barwidth = 1,
                                 barheight = 30,
                                 label.position = 'left')) +
    labs(subtitle = 'Cumulative COVID-19 deaths per million population',
         title = paste0(format(this_date, '%B %d, %Y'))) +
    geom_text(data = today_centroids,
              aes(x = x,
                  y = y,
                  group = NA,
                  label = paste0(ccaa, '\n',
                                 round(var, digits = 2))),
              alpha = 0.8,
              size = 1.5)
  
  ggsave(paste0('/tmp/animation_map/', this_date, '.png'),
         plot = g,
         height = 6, width = 9)
}
# Command line
cd /tmp/animation_map
mogrify -resize 40% *.png
convert -delay 20 -loop 0 *.png result.gif

Deaths overall over time Spain

df_country %>% filter(country == 'Spain') %>% arrange(date) %>% tail
# A tibble: 6 x 10
# Groups:   country [1]
  country date        cases deaths   uci hospitalizations cases_non_cum
  <chr>   <date>      <dbl>  <dbl> <dbl>            <int>         <dbl>
1 Spain   2020-04-07 146690  14555  7130                0          6179
2 Spain   2020-04-08 152446  15238  7371                0          5756
3 Spain   2020-04-09 157022  15843  7482                0          4576
4 Spain   2020-04-10 161852  16353  7535                0          4830
5 Spain   2020-04-11 166019  16972  7554                0          4167
6 Spain   2020-04-12 169496  17489  7777                0          3477
# … with 3 more variables: deaths_non_cum <dbl>, uci_non_cum <dbl>, iso <chr>

Deaths yesterday

pd <- df_country
pd$value <- pd$deaths_non_cum
the_date <- Sys.Date() - 1
pd <- pd %>%
  filter(date == the_date) %>%
  dplyr::select(country, iso, cases, cases_non_cum,
                deaths, value) %>%
  dplyr::arrange(desc(value)) %>%
  left_join(world_pop %>% dplyr::select(-country)) %>%
  mutate(value_per_million = value / pop * 1000000) #%>% 
  # arrange(desc(value_per_million))
pd <- pd[1:10,]
pd$country <- factor(pd$country, levels = pd$country)
ggplot(data = pd,
       aes(x = country,
           y = value)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  geom_text(aes(label = value),
            nudge_y = -20,
            size = 4,
            color = 'white') +
  labs(title = paste0('Confirmed COVID-19 deaths on ', the_date),
       x = '', y = '')

pd
# A tibble: 10 x 10
# Groups:   country [10]
   country iso    cases cases_non_cum deaths value    pop region sub_region
   <fct>   <chr>  <dbl>         <dbl>  <dbl> <dbl>  <dbl> <chr>  <chr>     
 1 US      USA   555313         28917  22020  1557 3.27e8 Ameri… Northern …
 2 United… GBR    85206          5332  10629   737 6.65e7 Europe Northern …
 3 France  FRA   133670          2943  14412   561 6.70e7 Europe Western E…
 4 Spain   ESP   169496          3477  17489   517 4.67e7 Europe Southern …
 5 Italy   ITA   156363          4092  19899   431 6.04e7 Europe Southern …
 6 Germany DEU   127854          2946   3022   286 8.29e7 Europe Western E…
 7 Belgium BEL    29647          1629   3600   254 1.14e7 Europe Western E…
 8 Iran    IRN    71686          1657   4474   117 8.18e7 Asia   Southern …
 9 Brazil  BRA    22192          1465   1223    99 2.09e8 Ameri… Latin Ame…
10 Turkey  TUR    56956          4789   1198    97 8.23e7 Asia   Western A…
# … with 1 more variable: value_per_million <dbl>

Deaths per million yesterday per CCAA

pd <- esp_df
pd$value <- pd$deaths_non_cum
the_date <- max(pd$date)
pd <- pd %>%
  filter(date == max(date)) %>%
  dplyr::select(ccaa, cases, cases_non_cum,
                deaths, value) %>%
  dplyr::arrange(desc(value)) %>%
  left_join(esp_pop)%>%
  mutate(value_per_million = value / pop * 1000000) #%>% 
  # arrange(desc(value_per_million))
pd <- pd[1:10,]
pd$ccaa <- factor(pd$ccaa, levels = pd$ccaa)
ggplot(data = pd,
       aes(x = ccaa,
           y = value)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  geom_text(aes(label = value),
            nudge_y = -20,
            size = 4,
            color = 'white')

pd
# A tibble: 10 x 7
   ccaa          cases cases_non_cum deaths value     pop value_per_million
   <fct>         <dbl>         <dbl>  <dbl> <dbl>   <dbl>             <dbl>
 1 Madrid        47146           559   6423   145 6663394             21.8 
 2 Cataluña      34726           699   3538    96 7675217             12.5 
 3 CLM           14054           356   1626    83 2032863             40.8 
 4 CyL           12628           510   1263    42 2399548             17.5 
 5 C. Valenciana  9060           219    876    38 5003769              7.59
 6 País Vasco    11018           246    831    27 2207776             12.2 
 7 Andalucía     10187           181    815    16 8414240              1.90
 8 Aragón         4187           117    464    14 1319291             10.6 
 9 Galicia        7494           158    284    10 2699499              3.70
10 Asturias       1958            66    149     9 1022800              8.80

Deaths yesterday animation

dir.create('/tmp/animation_deaths')
dates <- seq(as.Date('2020-03-17'), (Sys.Date()-1), by = 1)
for(i in 1:length(dates)){
  this_date <- dates[i]
  pd <- df_country
  pd$value <- pd$deaths_non_cum
  pd <- pd %>%
    filter(date == max(this_date)) %>%
    dplyr::select(country, cases, cases_non_cum,
                  deaths, value) %>%
    dplyr::arrange(desc(value))
  pd <- pd[1:10,]
  pd <- pd %>% filter(value > 0)
  pd$country <- gsub(' ', '\n', pd$country)
  pd$country <- factor(pd$country, levels = pd$country)
  pd$color_var <- pd$country == 'Spain'
  if('Spain' %in% pd$country){
    cols <- rev(c('darkred', 'black'))
  } else {
    cols <- 'black'
  }
  g <- ggplot(data = pd,
         aes(x = country,
             y = value)) +
    geom_bar(stat = 'identity',
             aes(fill = color_var),
             alpha = 0.8,
             show.legend = FALSE) +
    theme_simple() +
    geom_text(aes(label = value),
              nudge_y = max(pd$value) * .05,
              size = 5,
              color = 'black') +
    labs(title = 'Daily (non-cumulative) COVID-19 deaths',
         subtitle = format(this_date, '%B %d')) +
    labs(x = 'Country',
         y = 'Deaths') +
    theme(axis.text = element_text(size = 15),
          plot.subtitle = element_text(size = 20)) +
    scale_fill_manual(name ='',
                      values = cols) +
    ylim(0, 900)
  ggsave(filename = paste0('/tmp/animation_deaths/', this_date, '.png'),
         g)
}
# Command line
cd /tmp/animation_deaths
mogrify -resize 50% *.png
convert -delay 50 -loop 0 *.png result.gif

Heatmap

pd <- by_country <-  esp_df %>% mutate(country = 'Spain') %>% 
  bind_rows(ita %>% mutate(country = 'Italy')) %>%
  bind_rows(por_df %>% mutate(country = 'Portugal')) %>%
  bind_rows(fra_df %>% mutate(country = 'France'))
pd$value <- pd$deaths_non_cum
max_date <- pd %>% group_by(country) %>% summarise(d = max(date)) %>% ungroup %>% summarise(d = min(d)) %>% .$d
# pd$value <- ifelse(is.na(pd$value), 0, pd$value)
left <- expand.grid(date = seq(min(pd$date),
                               max(pd$date),
                               by = 1),
                    ccaa = sort(unique(pd$ccaa)))
right <- pd %>% dplyr::select(date, ccaa, value)
pd <- left_join(left, right) %>% mutate(value = ifelse(is.na(value), NA, value))
pd <- left_join(pd, by_country %>% dplyr::distinct(country, ccaa)) %>%
  filter(date <= max_date) %>%
  filter(value > 0)
the_limits <- c(1, 600)
the_breaks <- c(1, seq(100, 600, length = 6)) #seq(0, 600, length = 7)
pd$ccaa <- factor(pd$ccaa, levels = rev(unique(sort(pd$ccaa))))
ggplot(data = pd,
       aes(x = date,
           y = ccaa,
           color = value,
           size = value)) +
  # geom_tile(color = 'white') +
  geom_point(alpha = 0.8) +
  # geom_tile() +
  scale_color_gradientn(colors = rev(colorRampPalette(brewer.pal(n = 8, 'Spectral'))(5)),
                        name = '',
                        limits = the_limits,
                        breaks = the_breaks) +
    # scale_fill_gradientn(colors = rev(colorRampPalette(brewer.pal(n = 8, 'Spectral'))(5)),
    #                     name = '',
    #                     limits = the_limits,
    #                     breaks = the_breaks) +
  scale_size_area(name = '', limits = the_limits, breaks = the_breaks, max_size = 10) +
  
  theme_simple() +
  facet_wrap(~country, scales = 'free_y') +
  theme(strip.text = element_text(size = 20),
        axis.title = element_blank(),
        axis.text = element_text(size = 10),
        axis.text.x = element_text(size = 12)) +
  guides(color = guide_legend(), size = guide_legend()) +
  labs(title = 'Daily (non-cumulative) COVID-19 deaths by sub-state regions',
       caption = paste0('Data as of ', max_date))

ggsave('/tmp/1.png',
       width = 10,
       height = 8)

Heatmap per population

pd <- by_country <-  esp_df %>% mutate(country = 'Spain') %>%  bind_rows(ita %>% mutate(country = 'Italy'))
poppy <- bind_rows(ita_pop, esp_pop)
pd <- pd %>% left_join(poppy)
pd$value <- pd$deaths_non_cum / pd$pop * 1000000
max_date <- pd %>% group_by(country) %>% summarise(d = max(date)) %>% ungroup %>% summarise(d = min(d)) %>% .$d
# pd$value <- ifelse(is.na(pd$value), 0, pd$value)
left <- expand.grid(date = seq(min(pd$date),
                               max(pd$date),
                               by = 1),
                    ccaa = sort(unique(pd$ccaa)))
right <- pd %>% dplyr::select(date, ccaa, value)
pd <- left_join(left, right) %>% mutate(value = ifelse(is.na(value), NA, value))
pd <- left_join(pd, by_country %>% dplyr::distinct(country, ccaa)) %>%
  filter(date <= max_date) %>%
  filter(value > 0)
the_limits <- c(1, 60)
the_breaks <- c(1, seq(10, 60, length = 6)) #seq(0, 600, length = 7)
pd$ccaa <- factor(pd$ccaa, levels = rev(unique(sort(pd$ccaa))))
ggplot(data = pd,
       aes(x = date,
           y = ccaa,
           color = value,
           size = value)) +
  # geom_tile(color = 'white') +
  geom_point(alpha = 0.8) +
  scale_color_gradientn(colors = rev(colorRampPalette(brewer.pal(n = 8, 'Spectral'))(5)),
                        name = '',
                        limits = the_limits,
                        breaks = the_breaks) +
  scale_size_area(name = '', limits = the_limits, breaks = the_breaks, max_size = 10) +
  theme_simple() +
  facet_wrap(~country, scales = 'free') +
  theme(strip.text = element_text(size = 26),
        axis.title = element_blank(),
        axis.text = element_text(size = 16),
        axis.text.x = element_text(size = 12)) +
  guides(color = guide_legend(), size = guide_legend()) +
  labs(title = 'Daily COVID-19 deaths per 1,000,000 population by sub-state regions',
       caption = paste0('Data as of ', max_date))

ggsave('/tmp/2.png',
       width = 10,
       height = 8)

Madrid vs rest of state

place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
                                      # ifelse(x == 'Cataluña', 'Cataluña',
                                             'Otras CCAA')
  # )
}
place_transform_ita <- function(x){
  ifelse(x == 'Lombardia', 'Lombardia', 
         # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 
                'Otras regiones italianas')
  # )
}
pd <- esp_df %>% mutate(country = 'España') %>%
  mutate(ccaa = place_transform(ccaa)) %>%
  bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
                           country = 'Italia')) %>%
  group_by(country, ccaa, date) %>% 
  summarise(cases = sum(cases),
            uci = sum(uci),
            deaths = sum(deaths),
            cases_non_cum = sum(cases_non_cum),
            deaths_non_cum = sum(deaths_non_cum),
            uci_non_cum = sum(uci_non_cum)) %>%
  left_join(esp_pop %>%
              mutate(ccaa = place_transform(ccaa)) %>%
              bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
              group_by(ccaa) %>%
              summarise(pop = sum(pop))) %>%
  mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
  group_by(country, date) %>%
  mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
         p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
                  levels = c('Madrid',
                             # 'Cataluña',
                             'Otras CCAA',
                             'Lombardia',
                             # 'Emilia Romagna',
                             'Otras regiones italianas'))
cols <- c(
  rev(brewer.pal(n = 3, 'Reds'))[1:2],
  rev(brewer.pal(n = 3, 'Blues'))[1:2]
)
ggplot(data = pd,
       aes(x = date,
           y = deaths_non_cum_p,
           fill = ccaa,
           group = ccaa)) +
  geom_bar(stat = 'identity',
           position = position_dodge(width = 0.99)) +
  scale_fill_manual(name = '', values = cols) +
  scale_color_manual(name = '', values = cols) +
  # geom_line(size = 0.2,
  #           aes(color = ccaa)) +
  xlim(as.Date('2020-03-09'),
       Sys.Date()-1) +
  theme_simple() +
  labs(x = 'Fecha',
       y = 'Muertes diarias por 1.000.000',
       title = 'Muertes por 1.000.000 habitantes') +
  theme(legend.position = 'top') +
  geom_text(aes(label = round(deaths_non_cum_p, digits = 1),
                color = ccaa,
                y = deaths_non_cum_p + 2,
                group = ccaa),
            size = 2.5,
            angle = 90,
            position = position_dodge(width = 0.99))

label_data <- pd %>%
  filter(country  %in% c('Italia', 'España')) %>%
  group_by(country) %>%
  filter(date == max(date))  %>%
  mutate(label = gsub('\nitalianas', '',  gsub(' ', '\n', ccaa))) %>%
  mutate(x = date - 2,
         y = p_deaths_country + 
           ifelse(p_deaths_country > 50, 10, -10))
ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=50])) %>% filter(date >= start_day),
       aes(x = date,
           y = p_deaths_country,
           color = ccaa,
           group = ccaa)) +
  # geom_bar(stat = 'identity',
  #          position = position_dodge(width = 0.99)) +
  scale_fill_manual(name = '', values = cols) +
  scale_color_manual(name = '', values = cols) +
  geom_line(size = 2,
            aes(color = ccaa)) +
  geom_point(size = 3,
             aes(color = ccaa)) +
  facet_wrap(~country, scales = 'free_x') +
  # xlim(as.Date('2020-03-09'),
  #      Sys.Date()-1) +
  theme_simple() +
  geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
  # geom_line(lty = 2, alpha = 0.6) +
  labs(x = 'Fecha',
       y = 'Porcentaje de muertes',
       title = 'Porcentaje de muertes del país: región más afectada vs. resto del país',
       subtitle = 'A partir del primer día en cada país con 50 o más muertes') +
  theme(legend.position = 'top',
        strip.text = element_text(size = 30),
        legend.text = element_text(size = 10))  +
  guides(color = guide_legend(nrow = 2,
                              keywidth = 5)) +
  geom_text(data = label_data,
            aes(x = x - 2,
                y = y,
                label = label,
                color = ccaa),
            size = 6,
            show.legend = FALSE)

Italy regions, Spanish regions, Chinese regions (adjusted for population)

# Spanish data
a <- esp_df %>%
  left_join(esp_pop) %>%
  mutate(country = 'Spain')
# Italian data
b <- ita %>%
  left_join(ita_pop) %>%
  mutate(country = 'Italy')
# Chinese data
d <- df %>% filter(country == 'China') %>%
  mutate(cases = cases) %>%
  mutate(ccaa = district) %>%
  mutate(country = 'China') %>%
  left_join(chi_pop)
# join
joined <- bind_rows(a, b, d)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000

# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(start_deaths = min(date[deaths >= x_deaths]),
         start_cases = min(date[cases >= x_cases]),
         start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
         start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
  ungroup %>%
  mutate(days_since_start_deaths = date - start_deaths,
         days_since_start_cases = date - start_cases,
         days_since_start_deaths_pm = date - start_deaths_pm,
         days_since_start_cases_pm = date - start_cases_pm) 

# Define plot data
pd <- joined %>% filter(days_since_start_deaths_pm >= 0) %>%
  mutate(country = ifelse(country == 'China',
                          'Hubei (China)',
                          ifelse(country == 'Italy', 'Italia', 'España')))

lombardy_location <- (max(pd_big$days_since_start_deaths_pm[pd_big$ccaa == 'Lombardia']))

# Define label data
label_data <- pd %>% group_by(ccaa) %>% filter(
                                                          (
                                                            (country == 'Hubei (China)' & days_since_start_deaths_pm == 22) |
                                                            (date == max(date) & country == 'España' & deaths_pm > 40 & days_since_start_deaths_pm >= 7) & ccaa != 'CyL' |
                                                              (date == max(date) & country == 'Italia' &
                                                                 ccaa != 'Liguria' & days_since_start_deaths_pm > 15)
                                                          ))
# Get differential label data based on what to be emphasized
bigs <- c('Madrid', 'Lombardia', 'Hubei')
label_data_big <- label_data %>%
  filter(ccaa %in% bigs)
label_data_small <- label_data %>%
  filter(!ccaa %in% bigs)

pd_big <- pd %>%
  filter(ccaa %in% bigs)
pd_small <- pd %>%
  filter(!ccaa %in% bigs)

# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- c('darkred', '#FF6633', '#006666')

ggplot(data = pd_big,
       aes(x = days_since_start_deaths_pm,
           y = deaths_pm,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.9,
            size = 2) +
  geom_line(data = pd_small,
            aes(x = days_since_start_deaths_pm,
                y = deaths_pm,
                color = country),
            alpha = 0.7,
            size = 1) +
  scale_y_log10() +
  scale_color_manual(name = '',
                     values = c(cols)) +
  theme_simple() +
  theme(legend.position = 'top') +
  labs(x = 'Dias desde "el comienzo del brote"',
       y = 'Muertes por millón de habitantes',
       title = 'Muertes por 1.000.000 habitantes',
       subtitle = paste0('Dia 0 ("comienzo del brote") = primer día a ', x_deaths_pm, ' o más muertes acumuladas por milión de población\nLíneas rojas: CCAA; líneas verde-azules: regiones italianas; línea naranja: Hubei, China'),
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = label_data_big,
            aes(x = days_since_start_deaths_pm + 0.6,
                y = (deaths_pm + 50),
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 8,
            alpha = 0.9,
            show.legend = FALSE) +
    geom_text(data = label_data_small,
            aes(x = days_since_start_deaths_pm + 0.6,
                y = deaths_pm  + (log(deaths_pm)/2),
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 5,
            alpha = 0.6,
            show.legend = FALSE) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 16),
        legend.text = element_text(size = 16),
        plot.title = element_text(size = 25))  +
  xlim(0, lombardy_location + 5)

Italy regions, Spanish regions, Chinese regions (raw numbers, not adjusted for population)

# Spanish data
a <- esp_df %>%
  left_join(esp_pop) %>%
  mutate(country = 'Spain')
# Italian data
b <- ita %>%
  left_join(ita_pop) %>%
  mutate(country = 'Italy')
# Chinese data
d <- df %>% filter(country == 'China') %>%
  mutate(cases = cases) %>%
  mutate(ccaa = district) %>%
  mutate(country = 'China') %>%
  left_join(chi_pop)
# join
joined <- bind_rows(a, b, d)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000

# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(start_deaths = min(date[deaths >= x_deaths]),
         start_cases = min(date[cases >= x_cases]),
         start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
         start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
  ungroup %>%
  mutate(days_since_start_deaths = date - start_deaths,
         days_since_start_cases = date - start_cases,
         days_since_start_deaths_pm = date - start_deaths_pm,
         days_since_start_cases_pm = date - start_cases_pm) 

# Define plot data
pd <- joined %>% filter(days_since_start_deaths >= 0) %>%
  mutate(country = ifelse(country == 'China',
                          'China',
                          ifelse(country == 'Italy', 'Italia', 'España')))

# Define label data
label_data <- pd %>% group_by(ccaa) %>% filter(
                                                          (
                                                            (country == 'China' & deaths >10 & days_since_start_deaths == 29) |
                                                            (date == max(date) & country == 'España' & deaths > 90) |
                                                              (date == max(date) & country == 'Italia' &
                                                                 ccaa != 'Liguria' & days_since_start_deaths > 10)
                                                          ))
# Get differential label data based on what to be emphasized
label_data_big <- label_data %>%
  filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
label_data_small <- label_data %>%
  filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))

pd_big <- pd %>%
  filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
pd_small <- pd %>%
  filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))

# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- c( '#FF6633',  'darkred', '#006666')
ggplot(data = pd_big,
       aes(x = days_since_start_deaths,
           y = deaths,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.9,
            size = 2) +
  geom_line(data = pd_small,
            aes(x = days_since_start_deaths,
                y = deaths,
                color = country),
            alpha = 0.7,
            size = 1) +
  scale_y_log10() +
  scale_color_manual(name = '',
                     values = c(cols)) +
  theme_simple() +
  theme(legend.position = 'top') +
  labs(x = 'Dias desde el primer día con 5 o más muertes acumuladas',
       y = 'Muertes',
       title = 'Muertes por COVID-19',
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = label_data_big,
            aes(x = days_since_start_deaths + 1.6,
                y = ifelse(ccaa == 'Hubei', (deaths -500),
                           ifelse(ccaa == 'Lombardia',  (deaths + 700),
                                   (deaths + 300))),
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 8,
            alpha = 0.9,
            show.legend = FALSE) +
    geom_text(data = label_data_small,
            aes(x = days_since_start_deaths + 1.6,
                align = 'left',
                y = deaths ,
                label = ccaa,
                # label = gsub(' ', '\n', ccaa),
                color = country),
            size = 5,
            alpha = 0.6,
            show.legend = FALSE) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20),
        legend.text = element_text(size = 16),
        plot.title = element_text(size = 30))  +
  xlim(0, 50)

ANIMATION: Italy regions, Spanish regions, Chinese regions (raw numbers, not adjusted for population)

# Spanish data
a <- esp_df %>%
  left_join(esp_pop) %>%
  mutate(country = 'Spain')
# Italian data
b <- ita %>%
  left_join(ita_pop) %>%
  mutate(country = 'Italy')
# Chinese data
d <- df %>% filter(country == 'China') %>%
  mutate(cases = cases) %>%
  mutate(ccaa = district) %>%
  mutate(country = 'China') %>%
  left_join(chi_pop)
# join
joined <- bind_rows(a, b, d)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000

# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(start_deaths = min(date[deaths >= x_deaths]),
         start_cases = min(date[cases >= x_cases]),
         start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
         start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
  ungroup %>%
  mutate(days_since_start_deaths = date - start_deaths,
         days_since_start_cases = date - start_cases,
         days_since_start_deaths_pm = date - start_deaths_pm,
         days_since_start_cases_pm = date - start_cases_pm) 

# Define plot data
pd <- joined %>% filter(days_since_start_deaths >= 0) %>%
  mutate(country = ifelse(country == 'China',
                          'China',
                          ifelse(country == 'Italy', 'Italia', 'España')))


add_zero <- function(x, n){
  x <- as.character(x)
  adders <- n - nchar(x)
  adders <- ifelse(adders < 0, 0, adders)
  for (i in 1:length(x)){
    if(!is.na(x[i])){
      x[i] <- paste0(
        paste0(rep('0', adders[i]), collapse = ''),
        x[i],
        collapse = '')  
    } 
  }
  return(x)
}
# # Define label data
# label_data <- pd %>% group_by(ccaa) %>% filter(
#                                                           (
#                                                             (country == 'China' & deaths >10 & days_since_start_deaths == 29) |
#                                                             (date == max(date) & country == 'España' & deaths > 90) |
#                                                               (date == max(date) & country == 'Italia' &
#                                                                  ccaa != 'Liguria' & days_since_start_deaths > 10)
#                                                           ))
# # Get differential label data based on what to be emphasized
# label_data_big <- label_data %>%
#   filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
# label_data_small <- label_data %>%
#   filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
# 
pd_big <- pd %>%
  filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
pd_small <- pd %>%
  filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))



# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- c( '#FF6633',  'darkred', '#006666')

the_dir <- '/tmp/animation/'
dir.create(the_dir)
the_dates <- sort(unique(c(pd_big$date, pd_small$date)))
for(i in 1:length(the_dates)){
  
  the_date <- the_dates[i]
  pd_big_sub <- pd_big %>% filter(date <= the_date)
  pd_big_current <- pd_big_sub %>% filter(date == the_date)
  pd_small_sub <- pd_small %>% filter(date <= the_date)
  pd_small_current <- pd_small_sub %>% filter(date == the_date)

  label_data_big <-
    pd_big_sub %>%
    filter(ccaa %in% c('Lombardia', 'Madrid', 'Hubei')) %>%
    group_by(ccaa) %>%
    filter(date == max(date)) %>%
    ungroup %>%
    mutate(days_since_start_deaths = ifelse(ccaa == 'Hubei' &
                                              days_since_start_deaths >32,
                                            32,
                                            days_since_start_deaths))
  
  label_data_small <-
    pd_small_sub %>%
    filter(ccaa %in% c('Emilia Romagna',
                       'Cataluña',
                       'CLM',
                       'País Vasco',
                       'Veneto',
                       'Piemonte',
                       'Henan',
                       'Heilongjiang')) %>%
    group_by(ccaa) %>%
    filter(date == max(date))

  n_countries <- length(unique(pd_big_sub$country))
  if(n_countries == 3){
    sub_cols  <- cols
  }
  if(n_countries == 2){
    sub_cols <- cols[c(1,3)]
  }
   if(n_countries == 1){
    sub_cols <- cols[1]
  }
  g <- ggplot(data = pd_big_sub,
       aes(x = days_since_start_deaths,
           y = deaths,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.9,
            size = 2) +
  geom_line(data = pd_small_sub,
            aes(x = days_since_start_deaths,
                y = deaths,
                color = country),
            alpha = 0.7,
            size = 1) +
    geom_point(data = pd_big_current,
               aes(x = days_since_start_deaths,
                y = deaths,
                color = country),
               size = 3) +
    geom_point(data = pd_small_current,
               aes(x = days_since_start_deaths,
                y = deaths,
                color = country),
               size = 1, alpha = 0.6) +
    scale_y_log10(limits = c(5, 4500)) +
  scale_color_manual(name = '',
                     values = sub_cols) +
  theme_simple() +
  theme(legend.position = 'top') +
  labs(x = 'Dias desde el primer día con 5 o más muertes acumuladas',
       y = 'Muertes',
       title = format(the_date, '%d %b'),
       subtitle = 'Muertes por COVID-19',
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = label_data_big,
            aes(x = days_since_start_deaths + 1,
                y = deaths,
                hjust = 0,
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 8,
            alpha = 0.9,
            show.legend = FALSE) +
    geom_text(data = label_data_small,
            aes(x = days_since_start_deaths + 1.6,
                y = deaths ,
                label = ccaa,
                # label = gsub(' ', '\n', ccaa),
                color = country),
            size = 5,
            alpha = 0.6,
            show.legend = FALSE) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20),
        legend.text = element_text(size = 16),
        plot.title = element_text(size = 35),
        plot.subtitle = element_text(size = 24))  +
  xlim(0, 38) 
  message(i)
  ggsave(paste0(the_dir, add_zero(i, 3), '.png'),
         height = 7,
         width = 10.5)
}
# Command line
cd /tmp/animation
mogrify -resize 50% *.png
convert -delay 20 -loop 0 *.png result.gif

ANIMATION: Spain only

# Spanish data
a <- esp_df %>%
  left_join(esp_pop) %>%
  mutate(country = 'Spain')
joined <- a
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000

# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(start_deaths = min(date[deaths >= x_deaths]),
         start_cases = min(date[cases >= x_cases]),
         start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
         start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
  ungroup %>%
  mutate(days_since_start_deaths = date - start_deaths,
         days_since_start_cases = date - start_cases,
         days_since_start_deaths_pm = date - start_deaths_pm,
         days_since_start_cases_pm = date - start_cases_pm) 

# Define plot data
pd <- joined %>% filter(days_since_start_deaths >= 0) %>%
  mutate(country = ifelse(country == 'China',
                          'China',
                          ifelse(country == 'Italy', 'Italia', 'España')))

bigs <- c('Madrid', 'Cataluña', 'CLM', 'CyL', 'País Vasco', 'La Rioja')
pd_big <- pd %>%
  filter(ccaa %in% bigs)
pd_small <- pd %>%
  filter(!ccaa %in% bigs)



# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- colorRampPalette(c('#A16928','#bd925a','#d6bd8d','#edeac2', '#b5c8b8','#79a7ac','#2887a1'))(length(unique(pd$country)))

the_dir <- '/tmp/animation2/'
dir.create(the_dir)
the_dates <- sort(unique(c(pd_big$date, pd_small$date)))
for(i in 1:length(the_dates)){
  
  the_date <- the_dates[i]
  pd_big_sub <- pd_big %>% filter(date <= the_date)
  pd_big_current <- pd_big_sub %>% filter(date == the_date)
  pd_small_sub <- pd_small %>% filter(date <= the_date)
  pd_small_current <- pd_small_sub %>% filter(date == the_date)

  label_data_big <-
    pd_big_sub %>%
    filter(ccaa %in% bigs) %>%
    group_by(ccaa) %>%
    filter(date == max(date)) %>%
    ungroup
  
  label_data_small <-
    pd_small_sub %>%
    group_by(ccaa) %>%
    filter(date == max(date))
# sub_cols <- colorRampPalette(c('#A16928','#bd925a','#d6bd8d','#edeac2', '#b5c8b8','#79a7ac','#2887a1'))(length(unique(pd$ccaa)))
  sub_cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Dark2'))(length(unique(pd$ccaa)))
  # sub_cols <- rainbow((length(unique(pd$ccaa))))
  
  g <- ggplot(data = pd_big_sub,
       aes(x = days_since_start_deaths,
           y = deaths,
           group = ccaa)) +
  geom_line(aes(color = ccaa),
            alpha = 0.9,
            size = 2) +
  geom_line(data = pd_small_sub,
            aes(x = days_since_start_deaths,
                y = deaths,
                color = ccaa),
            alpha = 0.7,
            size = 1) +
    geom_point(data = pd_big_current,
               aes(x = days_since_start_deaths,
                y = deaths,
                color = ccaa),
               size = 3) +
    geom_point(data = pd_small_current,
               aes(x = days_since_start_deaths,
                y = deaths,
                color = ccaa),
               size = 1, alpha = 0.6) +
    geom_point(data = pd,
               aes(x = days_since_start_deaths,
                y = deaths,
                color = ccaa),
               size = 1, alpha = 0.01) +
    scale_y_log10(limits = c(5, max(pd$deaths)*1.2),
                  breaks = c(10, 50, 100, 500, 1000)) +
  scale_color_manual(name = '',
                     values = sub_cols) +
  theme_simple() +
  theme(legend.position = 'top') +
  labs(x = 'Dias desde el primer día con 5 o más muertes acumuladas',
       y = 'Muertes',
       title = format(the_date, '%d %b'),
       subtitle = 'Muertes por COVID-19',
       caption = '@joethebrew | www.databrew.cc')   +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20),
        legend.text = element_text(size = 16),
        plot.title = element_text(size = 35),
        plot.subtitle = element_text(size = 24))  +
  xlim(0, 20) +
    theme(legend.position = 'none')
  message(i)
  if(nrow(label_data_big) > 0){
    g <- g +
      geom_text(data = label_data_big,
            aes(x = days_since_start_deaths + 0.2,
                y = deaths,
                hjust = 0,
                label = gsub(' ', ' ', ccaa),
                color = ccaa),
            size = 8,
            alpha = 0.9,
            show.legend = FALSE) +
    geom_text(data = label_data_small,
            aes(x = days_since_start_deaths + 0.2,
                y = deaths ,
                label = ccaa,
                # label = gsub(' ', '\n', ccaa),
                color = ccaa),
            size = 5,
            alpha = 0.6,
            show.legend = FALSE)
  }
  
  ggsave(paste0(the_dir, add_zero(i, 3), '.png'),
         height = 7,
         width = 12)
}
# Command line
cd /tmp/animation
mogrify -resize 50% *.png
convert -delay 25 -loop 0 *.png result.gif

Italy regions for Spanish regions

# Spanish data
a <- esp_df %>%
  left_join(esp_pop) %>%
  mutate(country = 'Spain')
# Italian data
b <- ita %>%
  left_join(ita_pop) %>%
  mutate(country = 'Italy')
# join
joined <- bind_rows(a, b)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000

# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(start_deaths = min(date[deaths >= x_deaths]),
         start_cases = min(date[cases >= x_cases]),
         start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
         start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
  ungroup %>%
  mutate(days_since_start_deaths = date - start_deaths,
         days_since_start_cases = date - start_cases,
         days_since_start_deaths_pm = date - start_deaths_pm,
         days_since_start_cases_pm = date - start_cases_pm) 

ggplot(data = joined %>% filter(days_since_start_deaths_pm >= 0),
       aes(x = days_since_start_deaths_pm,
           y = deaths_pm,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.8,
            size = 2) +
  scale_y_log10() +
  scale_color_manual(name = '',
                     values = c('darkorange', 'purple')) +
  theme_simple() +
  theme(legend.position = 'none') +
  labs(x = 'Days since "start out outbreak"',
       y = 'Deaths per million',
       title = 'Deaths per capita, Italian regions vs. Spanish autonomous communities',
       subtitle = paste0('Day 0 ("start of outbreak") = first day at ', x_deaths_pm, ' or greater cumulative deaths per million'),
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = joined %>% group_by(ccaa) %>% filter(date == max(date) & 
                                                          (
                                                            (country == 'Spain' & deaths_pm > 25) |
                                                              (country == 'Italy' & days_since_start_deaths_pm > 10)
                                                          )),
            aes(x = days_since_start_deaths_pm + 0.6,
                y = deaths_pm,
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 6) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20)) +
  xlim(0, 23)

ggsave('/tmp/italy_comparison.png',
       height = 6,
       width = 10)


# Separate for Catalonia
pd <- joined %>% filter(days_since_start_deaths_pm >= 0) %>%
         mutate(country = ifelse(ccaa == 'Cataluña',
                                 'Catalonia',
                                 country)) %>%
  mutate(ccaa = ifelse(ccaa == 'Cataluña', 'Catalunya', ccaa))
pdcat <- pd %>% filter(country == 'Catalonia')
label_data <- pd %>% group_by(ccaa) %>% filter(date == max(date) & 
                                                          (
                                                            (country == 'Catalonia') |
                                                            (country == 'Spain' & deaths_pm > 25) |
                                                              (country == 'Italy' & days_since_start_deaths_pm > 10)
                                                          ))
ggplot(data = pd,
       aes(x = days_since_start_deaths_pm,
           y = deaths_pm,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.3,
            size = 1.5) +
    geom_line(data = pdcat,
              aes(color = country),
            alpha = 0.8,
            size = 2) +
      geom_point(data = pdcat %>% filter(date == max(date)),
              aes(color = country),
            alpha = 0.8,
            size = 4) +
  scale_y_log10() +
  scale_color_manual(name = '',
                     values = c('darkred', 'darkorange', "purple")) +
  theme_simple() +
  theme(legend.position = 'none') +
  labs(x = 'Dies des del "començament del brot"',
       y = 'Morts per milió',
       title = 'Morts per càpita: Catalunya, comunitats autònomes, regions italianes',
       subtitle = paste0('Dia 0 ("començament del brot") = primer dia a ', x_deaths_pm, ' o més morts acumulades per milió de població'),
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = label_data,
            aes(x = days_since_start_deaths_pm +0.2 ,
                y = deaths_pm +3,
                hjust = 0,
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 6,
            alpha = 0.7) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20)) +
  xlim(0, 24)

ggsave('/tmp/cat_italy_comparison.png',
       height = 6,
       width = 10)


# Straightforward Lombardy, Madrid, Cat comparison
specials <- c('Lombardia', 'Madrid')
pd <- joined %>% filter(days_since_start_deaths_pm >= 0) %>%
         mutate(country = ifelse(ccaa == 'Cataluña',
                                 'Catalonia',
                                 country)) %>%
  mutate(ccaa = ifelse(ccaa == 'Cataluña', 'Catalunya', ccaa))
pdcat <- pd %>% filter(ccaa %in%  specials)
label_data <- pd %>% group_by(ccaa) %>% filter(date == max(date) & 
                                                          (
                                                            # (country == 'Catalonia') |
                                                            (country == 'Spain' & deaths_pm > 20) |
                                                              (country == 'Italy' & days_since_start_deaths_pm >= 10)
                                                          ))
ggplot(data = pd,
       aes(x = days_since_start_deaths_pm,
           y = deaths_pm,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.3,
            size = 1.5) +
    geom_line(data = pdcat,
              aes(color = country),
            alpha = 0.8,
            size = 2) +
  scale_y_log10() +
  scale_color_manual(name = '',
                     values = c('darkred', 'darkorange', "purple")) +
  theme_simple() +
  theme(legend.position = 'none') +
  labs(x = 'Dias desde "el comienzo del brote"',
       y = 'Muertes por millón de habitantes',
       title = 'Muertes acumuladas por 1.000.000 habitantes',
       subtitle = paste0('Dia 0 ("comienzo del brote") = primer día a ', x_deaths_pm, ' o más muertes acumuladas por milión de población'),
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = label_data %>% filter(!ccaa %in% specials),
            aes(x = days_since_start_deaths_pm + 0.4,
                y = deaths_pm +3,
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 5,
            alpha = 0.5) +
    geom_text(data = label_data %>% filter(ccaa %in% specials),
            aes(x = days_since_start_deaths_pm ,
                y = deaths_pm +30,
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 8,
            alpha = 0.8) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20)) +
  xlim(0, 23)

ggsave('/tmp/mad_lom_italy_comparison.png',
       height = 6,
       width = 10)

Loop for regions of the world

isos <- sort(unique(world_pop$sub_region))
isos <- c('Central Asia', 'Eastern Asia', 'Eastern Europe',
          'Latin America and the Caribbean',
          'Northern Africa', 'Northern America',
          'Nothern Europe',
          'South-eastern Asia',
          'Southern Asia', 'Southern Europe',
          'Sub-Saharan Africa', 'Western Asia', 'Western Europe')
dir.create('/tmp/world')
for(i in 1:length(isos)){
  this_iso <- isos[i]
  message(i, ' ', this_iso)
  countries <- world_pop %>% filter(sub_region == this_iso)
  pd <- df %>%
    filter(!country %in% c('Guyan
                           a', 'Bahamas', 'The Bahamas')) %>%
          group_by(country, iso, date) %>%
          summarise(cases = sum(cases, na.rm = TRUE)) %>%
    ungroup %>%
    group_by(country) %>%
         filter(length(which(cases > 0)) > 1) %>%
    ungroup %>%
         filter(iso %in% countries$iso)
  if(nrow(pd) > 0){
    cols <- colorRampPalette(brewer.pal(n = 8, 'Spectral'))(length(unique(pd$country)))
cols <- sample(cols, length(cols))
    # Plot
n_countries <- (length(unique(pd$country)))
ggplot(data = pd,
       aes(x = date,
           # color = country,
           # fill = country,
           y = cases)) +
  theme_simple() +
  # geom_point() +
  # geom_line() +
  geom_area(fill = 'darkred', alpha = 0.3, color = 'darkred') +
  # scale_color_manual(name = '',
  #                    values = cols) +
  # scale_fill_manual(name = '',
  #                   values = cols) +
  theme(legend.position = 'none',
        axis.text = element_text(size = 6),
        strip.text = element_text(size = ifelse(n_countries > 20, 6,
                                                ifelse(n_countries > 10, 10,
                                                       ifelse(n_countries > 5, 11, 12))) ),
        legend.text = element_text(size = 6)) +
  # scale_y_log10() +
  facet_wrap(~country,
             scales = 'free') +
  labs(x = '',
       y = 'Confirmed cases',
       title = paste0('Confirmed cases of COVID-19 in ', this_iso)) 
  ggsave(paste0('/tmp/world/', this_iso, '.png'),
         width = 12, 
         height = 7)
  }



}

Rolling average new events

plot_data <- df_country %>% filter(country %in% c('Spain', 'France', 'Italy', 'Germany', 'Belgium', 'Norway')) %>% mutate(geo = country)
roll_curve(plot_data, pop = T)

dir.create('/tmp/countries')
roll_curve_country <- function(the_country = 'Spain'){
  plot_data <- df_country %>% filter(country %in% the_country) %>% mutate(geo = country)
  g1 <- roll_curve(plot_data, pop = F)
  g2 <- roll_curve(plot_data, pop = T)
  g3 <- roll_curve(plot_data, pop = F, deaths = T)
  g4 <- roll_curve(plot_data, pop = T, deaths = T)
  ggsave(paste0('/tmp/countries/', the_country, '1.png'), g1)
  ggsave(paste0('/tmp/countries/', the_country, '2.png'), g2)
  ggsave(paste0('/tmp/countries/', the_country, '3.png'), g3)
  ggsave(paste0('/tmp/countries/', the_country, '4.png'), g4)
}


countries <- c('Spain', 'France', 'Italy', 'Germany', 'Belgium', 'Norway', 'US', 'United Kingdom', 'Korea, South',
  'China', 'Japan', 'Switzerland', 'Sweden', 'Denmark', 'Netherlands', 'Iran', 'Canada')
for(i in 1:length(countries)){
  roll_curve_country(the_country = countries[i])
}
Error in ts(x): 'ts' object must have one or more observations
# Cases
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data)

# Cases adjusted
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data, pop = T)

# Deaths
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data, deaths = T)

# Cases adjusted
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data, pop = T, deaths = T)

plot_data <- esp_df  %>% mutate(geo = ccaa)

roll_curve(plot_data, pop = T, deaths = T)

plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)

roll_curve(plot_data, deaths = T)

# Latest in Spain
pd <- esp_df %>%
  filter(date == max(date)) %>%
  mutate(p = deaths / sum(deaths) * 100)
text_size <- 12

# deaths
ggplot(data = pd,
       aes(x = ccaa,
           y = deaths)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(x = '',
       y = 'Deaths | Muertes',
       title = 'COVID-19 deaths in Spain',
       subtitle = paste0('Data as of ', max(pd$date)),
       caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
  theme(legend.position = 'top',
        legend.text = element_text(size = text_size * 2),
        axis.title = element_text(size = text_size * 2),
        plot.title = element_text(size = text_size * 2.3),
        axis.text.x = element_text(size = text_size * 1.5)) +
  geom_text(data = pd %>% filter(deaths > 0),
            aes(x = ccaa,
                y = deaths,
                label = paste0(deaths, '\n(',
                               round(p, digits = 1), '%)')),
            size = text_size * 0.3,
            nudge_y = 180) +
  ylim(0, max(pd$deaths * 1.1))

ggsave('/tmp/spain.png')

Muertes relativas por CCAA

# Latest in Spain
pd <- esp_df %>%
  filter(date == max(date)) %>%
  mutate(p = deaths / sum(deaths) * 100)

pd <- pd %>% left_join(esp_pop)
text_size <- 12
pd$value <- pd$deaths / pd$pop * 100000

# deaths
ggplot(data = pd,
       aes(x = ccaa,
           y = value)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(x = '',
       y = 'Deaths per 100,000',
       title = 'COVID-19 deaths per 100.000',
       subtitle = paste0('Data as of ', max(pd$date)),
       caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
  theme(legend.position = 'top',
        legend.text = element_text(size = text_size * 2),
        axis.title = element_text(size = text_size * 2),
        plot.title = element_text(size = text_size * 2.3),
        axis.text.x = element_text(size = text_size * 1.5)) +
  geom_text(data = pd %>% filter(value > 0),
            aes(x = ccaa,
                y = value,
                label = paste0(round(value, digits = 2), '\n(',
                               deaths, '\ndeaths)')),
            size = text_size * 0.3,
            nudge_y = 4.5) +
  ylim(0, max(pd$value) * 1.2)

ggsave('/tmp/spai2.png')

Just yesterday

# Latest in Spain
pd <- esp_df %>%
  filter(date == max(date)) %>%
  mutate(p = deaths_non_cum / sum(deaths_non_cum) * 100)
text_size <- 12

# deaths
ggplot(data = pd,
       aes(x = ccaa,
           y = deaths_non_cum)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(x = '',
       y = 'Deaths',
       title = 'COVID-19 deaths in Spain',
       subtitle = paste0('Data only for ', max(pd$date)),
       caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
  theme(legend.position = 'top',
        legend.text = element_text(size = text_size * 2),
        axis.title = element_text(size = text_size * 2),
        plot.title = element_text(size = text_size * 2.3),
        axis.text.x = element_text(size = text_size * 1.5)) +
  geom_text(data = pd %>% filter(deaths_non_cum > 0),
            aes(x = ccaa,
                y = deaths_non_cum,
                label = paste0(deaths_non_cum, '\n(',
                               round(p, digits = 1), '%)')),
            size = text_size * 0.3,
            nudge_y = 30) +
  ylim(0, max(pd$deaths_non_cum * 1.1))

ggsave('/tmp/spain_non_cum.png')

Muertes relativas por CCAA ayer SOLO

# Latest in Spain
pd <- esp_df %>%
  filter(date == max(date)) %>%
  mutate(p = deaths_non_cum / sum(deaths_non_cum) * 100)

pd <- pd %>% left_join(esp_pop)
text_size <- 12
pd$value <- pd$deaths_non_cum / pd$pop * 100000

# deaths
ggplot(data = pd,
       aes(x = ccaa,
           y = value)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(x = '',
       y = 'Deaths per 100,000',
       title = 'COVID-19 deaths per 100.000',
       subtitle = paste0('Data as of ', max(pd$date)),
       caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
  theme(legend.position = 'top',
        legend.text = element_text(size = text_size * 2),
        axis.title = element_text(size = text_size * 2),
        plot.title = element_text(size = text_size * 2.3),
        axis.text.x = element_text(size = text_size * 1.5)) +
  geom_text(data = pd %>% filter(value > 0),
            aes(x = ccaa,
                y = value,
                label = paste0(round(value, digits = 2), '\n(',
                               deaths_non_cum, '\ndeaths)')),
            size = text_size * 0.3,
            nudge_y = 1) +
  ylim(0, max(pd$value) * 1.3)

ggsave('/tmp/spain_ayer_adj.png')
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, scales = 'fixed')

ggsave('/tmp/a.png',
       width = 1280 / 150,
       height = 720 / 150)

Loop for everywhere (see desktop)

dir.create('/tmp/ccaas')
ccaas <- sort(unique(esp_df$ccaa))
for(i in 1:length(ccaas)){
  this_ccaa <- ccaas[i]
  plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
  roll_curve(plot_data, scales = 'fixed')  + theme(strip.text = element_text(size = 30))
  ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_cases.png'),
         width = 1280 / 150,
         height = 720 / 150)
}

ccaas <- sort(unique(esp_df$ccaa))
for(i in 1:length(ccaas)){
  this_ccaa <- ccaas[i]
  plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
  roll_curve(plot_data, scales = 'fixed', pop = TRUE)  + theme(strip.text = element_text(size = 30))
  ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_cases_pop.png'),
         width = 1280 / 150,
         height = 720 / 150)
}

# Deaths too
for(i in 1:length(ccaas)){
  this_ccaa <- ccaas[i]
  plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
  roll_curve(plot_data, deaths = T, scales = 'fixed') + theme(strip.text = element_text(size = 30))
  ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_deaths.png'),
         width = 1280 / 150,
         height = 720 / 150)
}

# Deaths too
for(i in 1:length(ccaas)){
  this_ccaa <- ccaas[i]
  plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
  roll_curve(plot_data, deaths = T, scales = 'fixed', pop = TRUE)  + theme(strip.text = element_text(size = 30))
  ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_deaths_pop.png'),
         width = 1280 / 150,
         height = 720 / 150)
}
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, scales = 'free_y')

ggsave('/tmp/b.png',
       width = 1280 / 150,
       height = 720 / 150)
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, deaths = T, scales = 'free_y')

ggsave('/tmp/c.png',
       width = 1280 / 150,
       height = 720 / 150)
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, deaths = T, scales = 'fixed')

ggsave('/tmp/d.png',
       width = 1280 / 150,
       height = 720 / 150)
plot_data <- df_country %>% filter(country %in% c('Spain', 'Italy', 'Germany', 'France', 'US',
                                                  'China', 'Korea, South', 'Japan', 'Singapore')) %>% mutate(geo = country)
roll_curve(plot_data, scales = 'free_y')

ggsave('/tmp/z.png',
       width = 1280 / 150,
       height = 720 / 150)

World at large

pd <- df_country %>%
  group_by(date) %>%
  summarise(n = sum(cases)) %>%
  filter(date < max(date))
ggplot(data = pd,
       aes(x = date,
           y = n)) +
  geom_point() +
  theme_simple() +
  labs(x = 'Date',
       y = 'Cases',
       title = 'COVID-19 cases')

ggsave('~/Videos/update/a.png',
       width = 1280 / 150,
       height = 720 / 150)
Error in grid.newpage(): could not open file '/home/joebrew/Videos/update/a.png'

China vs world

pd <- df_country %>%
  group_by(date,
           country = ifelse(country == 'China', 'China', 'Other countries')) %>%
  summarise(n = sum(cases))  %>%
  ungroup %>%
  filter(date < max(date))
Error: Column `country` can't be modified because it's a grouping variable
ggplot(data = pd,
       aes(x = date,
           y = n,
           color = country)) +
  geom_line(size = 2) +
  # geom_point() +
  theme_simple() +
  labs(x = 'Date',
       y = 'Cases',
       title = 'COVID-19 cases') +
  scale_color_manual(name = '',
                     values = c('red', 'black')) +
  theme(legend.text = element_text(size = 25),
        legend.position = 'top')
Error in FUN(X[[i]], ...): object 'country' not found

ggsave('~/Videos/update/b.png',
       width = 1280 / 150,
       height = 720 / 150)
Error in grid.newpage(): could not open file '/home/joebrew/Videos/update/b.png'

NOn china only

pd <- df_country %>%
  group_by(date,
           country = ifelse(country == 'China', 'China', 'Other countries')) %>%
  summarise(n = sum(cases)) %>%
  filter(country == 'Other countries')  %>%
  ungroup %>%
  filter(date < max(date))
Error: Column `country` can't be modified because it's a grouping variable
ggplot(data = pd,
       aes(x = date,
           y = n)) +
  geom_line(size = 2) +
  # geom_point() +
  theme_simple() +
  labs(x = 'Date',
       y = 'Cases',
       title = 'COVID-19 cases, outside of China') 

ggsave('~/Videos/update/c.png',
       width = 1280 / 150,
       height = 720 / 150)
Error in grid.newpage(): could not open file '/home/joebrew/Videos/update/c.png'

Case numbers across countries

plot_day_zero(countries = c('France', 'Germany', 'Italy', 'Spain', 'Switzerland', 'Sweden', 'Norway', 'Netherlands'))

# ggsave('~/Videos/update/d.png',
#        width = 1280 / 150,
#        height = 720 / 150)

World at large - deaths

pd <- df_country %>%
  group_by(date) %>%
  summarise(n = sum(deaths)) %>%
  filter(date < max(date))
ggplot(data = pd,
       aes(x = date,
           y = n)) +
  geom_point() +
  theme_simple() +
  labs(x = 'Date',
       y = 'Deaths',
       title = 'COVID-19 deaths')

# ggsave('~/Videos/update/e.png',
#        width = 1280 / 150,
#        height = 720 / 150)

China vs world deaths

pd <- df_country %>%
  group_by(date,
           country = ifelse(country == 'China', 'China', 'Other countries')) %>%
  summarise(n = sum(deaths))  %>%
  ungroup %>%
  filter(date < max(date))
Error: Column `country` can't be modified because it's a grouping variable
ggplot(data = pd,
       aes(x = date,
           y = n,
           color = country)) +
  geom_line(size = 2) +
  # geom_point() +
  theme_simple() +
  labs(x = 'Date',
       y = 'Deaths',
       title = 'COVID-19 deaths') +
  scale_color_manual(name = '',
                     values = c('red', 'black')) +
  theme(legend.text = element_text(size = 25),
        legend.position = 'top')
Error in FUN(X[[i]], ...): object 'country' not found

# ggsave('~/Videos/update/f.png',
#        width = 1280 / 150,
#        height = 720 / 150)

Asian hope

plot_day_zero(countries = c('Korea, South', 'Japan', 'China', 'Singapore'))

# ggsave('~/Videos/update/g.png',
#        width = 1280 / 150,
#        height = 720 / 150)

Since trajectories are very unstable when cases are low, we’ll exclude from our analysis the first few days, and will only count as “outbreak” once a country reaches 150 or more cumulative cases.

# Doubling time
n_cases_start = 150
countries = c('Italy', 'Spain', 'France', 'Germany', 'Italy', 'Switzerland', 'Denmark', 'US', 'United Kingdom', 'Norway')
# countries <- sort(unique(df_country$country))
out_list <- curve_list <-  list()
counter <- 0
for(i in 1:length(countries)){
  message(i)
  this_country <- countries[i]
  sub_data <-df_country %>% filter(country == this_country)
  # Only calculate on countries with n_cases_start or greater cases,
  # starting at the first day at n_cases_start or greater
  ok <- max(sub_data$cases, na.rm = TRUE) >= n_cases_start
  if(ok){
    counter <- counter + 1
    pd <- sub_data %>%
      filter(!is.na(cases)) %>%
      mutate(start_date = min(date[cases >= n_cases_start])) %>%
      mutate(days_since = date - start_date) %>%
      filter(days_since >= 0) %>%
      mutate(days_since = as.numeric(days_since))
    fit <- lm(log(cases) ~ days_since, data = pd) 
    # plot(pd$days_since, log(pd$cases))
    # abline(fit)
    ## Slope
    curve <- fit$coef[2]
    
    # Predict days ahead
    fake <- tibble(days_since = seq(0, max(pd$days_since) + 5, by = 1))
    fake <- left_join(fake, pd %>% dplyr::select(days_since, cases, date))
    fake$predicted <- exp(predict(fit, newdata = fake))
    
    # Doubling time
    dt <- log(2)/fit$coef[2]
    out <- tibble(country = this_country,
                  doubling_time = dt,
                  slope = curve)
    out_list[[counter]] <- out
    curve_list[[counter]] <- fake %>% mutate(country = this_country)
  }
}
done <- bind_rows(out_list)
print(done)
# A tibble: 10 x 3
   country        doubling_time  slope
   <chr>                  <dbl>  <dbl>
 1 Italy                   5.28 0.131 
 2 Spain                   4.06 0.171 
 3 France                  4.39 0.158 
 4 Germany                 4.26 0.163 
 5 Italy                   5.28 0.131 
 6 Switzerland             5.34 0.130 
 7 Denmark                 8.50 0.0816
 8 US                      3.16 0.219 
 9 United Kingdom          3.96 0.175 
10 Norway                  8.01 0.0865
curves <- bind_rows(curve_list)
# Get curves back in exponential form
# curves$curve <- exp(curves$curve)

# Join doubling time to curves
joined <- left_join(curves, done)

# Get rid of Italy future (since it's the "leader")
joined <- joined %>%
  filter(country != 'Italy' |
           date <= (Sys.Date() -1))


# Make long format
long <- joined %>% 
  dplyr::select(date, days_since, country, cases, predicted, doubling_time) %>%
  tidyr::gather(key, value, cases:predicted) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

The below chart shows the trajectories in terms of number of cases in Europe in red, and the predicted trajectories in black. The black line assumes that the doubling rate will stay constant.

cols <- c('red', 'black')
ggplot(data = long,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = long %>% filter(key != 'Confirmed cases'),
            size = 1.2, alpha = 0.8) +
  geom_point(data = long %>% filter(key == 'Confirmed cases')) +
  geom_line(data = long %>% filter(key == 'Confirmed cases'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >150 cumulative cases',
       y = 'Cases',
       title = 'COVID-19 CASES: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >150 cumulative cases)') +
    theme(strip.text = element_text(size = 13),
          plot.title = element_text(size = 15))

Since Italy is “leading the way”, it’s helpful to also compare each country to Italy. Let’s see that.

# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Italy')
ol2 <- joined %>% filter(country == 'Italy') %>% dplyr::rename(Italy = cases) %>%
  dplyr::select(Italy, days_since)
ol <- left_join(ol1, ol2) %>%
  dplyr::select(days_since, date, country, cases, predicted, Italy,doubling_time)
ol <- tidyr::gather(ol, key, value, cases: Italy) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

cols <- c('red', 'blue', 'black')
ggplot(data = ol,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = ol %>% filter(!key %in% c('Confirmed cases', 'Italy')),
            size = 1.2, alpha = 0.8) +
    geom_line(data = ol %>% filter(key %in% c('Italy')),
            size = 0.8, alpha = 0.8) +
  geom_point(data = ol %>% filter(key == 'Confirmed cases')) +
  geom_line(data = ol %>% filter(key == 'Confirmed cases'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,6,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >150 cumulative cases',
       y = 'Cases',
       title = 'COVID-19 CASES: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >150 cumulative cases)') +
    theme(strip.text = element_text(size = 13),
          plot.title = element_text(size = 15))

In the above, what’s striking is how many places have trajectories that are worse than Italy’s. Yes, Italy has more cases, but it’s doubling time is less. Either that changes soon, or these other countries will soon have more cases than Italy.

Deaths or cases?

The number of cases is not necessarily the best indicator for the severity of an outbreak of this nature. Why? Because (a) testing rates and protocols are different by place and (b) testing rates are different by time (since health services are changing their approaches as things develop). In other words, when we compare the number of cases by place and time, we are introducing significant bias.

Using deaths to gauge the magnitude of the outbreak is also problematic. Death rates are differential by age, so the number of deaths depends on a country’s population period, or age structure. Also, death rates will be a function of health services, which are not of the same quality every where. And, of course, like cases, we don’t necessarily know about all of the deaths that occur because of COVID-19.

Still, there’s an argument that death rates have less bias than case rates because deaths are easier to identify than cases. Let’s accept that argument, for the time being, and have a look at death rates by country.

# Doubling time
n_deaths_start = 5
countries = c('Italy', 'Spain', 'France', 'Italy', 'Switzerland', 'Denmark', 'US', 'United Kingdom', 'Norway', 'Germany')
# countries <- sort(unique(df_country$country))

make_double_time <- function(data = df_country,
                             the_country = 'Spain',
                             n_deaths_start = 5,
                             time_ahead = 7){
   sub_data <-data %>% filter(country == the_country)
  # Only calculate on countries with n_cases_start or greater cases,
  # starting at the first day at n_cases_start or greater
  ok <- max(sub_data$deaths, na.rm = TRUE) >= n_deaths_start
  if(ok){
    counter <- counter + 1
    pd <- sub_data %>%
      filter(!is.na(deaths)) %>%
      mutate(start_date = min(date[deaths >= n_deaths_start])) %>%
      mutate(days_since = date - start_date) %>%
      filter(days_since >= 0) %>%
      mutate(days_since = as.numeric(days_since)) %>%
      mutate(the_weight = 1/(1 + (as.numeric(max(date) - date))))
    fit <- lm(log(deaths) ~ days_since,
              weights = the_weight,
              data = pd) 
    # fitlo <- loess(deaths ~ days_since, data = pd)
    # plot(pd$days_since, log(pd$cases))
    # abline(fit)
    ## Slope
    # curve <- fit$coef[2]
    
    # Predict days ahead
    day0 <- pd$date[pd$days_since == 0]
    fake <- tibble(days_since = seq(0, max(pd$days_since) + time_ahead, by = 1))
    fake <- fake %>%mutate(date = seq(day0, day0+max(fake$days_since), by = 1))
    fake <- left_join(fake, pd %>% dplyr::select(days_since, deaths, date))
    fake$predicted <- exp(predict(fit, newdata = fake))
    # fake$predictedlo <- predict(fitlo, newdata = fake)
    ci <- exp(predict(fit, newdata = fake, interval = 'prediction'))
    # cilo <- predict(fitlo, newdata = fake, interval = 'prediction')

    fake$lwr <- ci[,'lwr']
    fake$upr <- ci[,'upr']
    # fake$lwrlo <- ci[,'lwr']
    # fake$uprlo <- ci[,'upr']
    # Doubling time
    dt <- log(2)/fit$coef[2]
    fake %>% mutate(country = the_country) %>% mutate(doubling_time = dt)
  }
}

plot_double_time <- function(data, ylog = F){
  the_labs <- labs(x = 'Date',
                   y = 'Deaths',
                   title = paste0('Predicted deaths in ', data$country[1]))
  long <- data %>%
    tidyr::gather(key, value, deaths:predicted) %>%
    mutate(key = Hmisc::capitalize(key))
  g <- ggplot() +
        geom_ribbon(data = data %>% filter(date > max(long$date[!is.na(long$value) & long$key == 'Deaths'])),
                aes(x = date,
                    ymax = upr,
                    ymin = lwr),
                alpha =0.6,
                fill = 'darkorange') +
    geom_line(data = long,
              aes(x = date,
                  y = value,
                  group = key,
                  lty = key)) +
    geom_point(data = long %>% filter(key == 'Deaths'),
               aes(x = date,
                   y = value)) +
    theme_simple() +
    theme(legend.position = 'right',
          legend.title = element_blank()) +
    the_labs
  if(ylog){
    g <- g + scale_y_log10()
  }
  return(g)
}
options(scipen = '999')
data <- make_double_time(n_deaths_start = 150, time_ahead = 7)
data
# A tibble: 36 x 8
   days_since date       country deaths predicted   lwr   upr doubling_time
        <dbl> <date>     <chr>    <dbl>     <dbl> <dbl> <dbl>         <dbl>
 1          0 2020-03-15 Spain      288     1092.  718. 1661.          6.55
 2          1 2020-03-16 Spain      491     1214.  808. 1825.          6.55
 3          2 2020-03-17 Spain      598     1350.  909. 2004.          6.55
 4          3 2020-03-18 Spain      767     1500. 1022. 2203.          6.55
 5          4 2020-03-19 Spain     1002     1668. 1149. 2421.          6.55
 6          5 2020-03-20 Spain     1326     1854. 1291. 2662.          6.55
 7          6 2020-03-21 Spain     1720     2061. 1451. 2928.          6.55
 8          7 2020-03-22 Spain     2182     2291. 1629. 3221.          6.55
 9          8 2020-03-23 Spain     2696     2547. 1829. 3545.          6.55
10          9 2020-03-24 Spain     3434     2831. 2053. 3904.          6.55
# … with 26 more rows
dir.create('/tmp/ccaa_predictions')

plot_double_time(data, ylog = T) +
  labs(subtitle = 'Basic log-linear model weighted at (1 + (1/ days ago)),\nassuming no change in growth trajectory since first day at >150 deaths')

ggsave('/tmp/ccaa_predictions/spain.png')
# All ccaas
ccaas <- sort(unique(esp_df$ccaa))
for(i in 1:length(ccaas)){
  message(i)
  this_ccaa <- ccaas[i]
  sub_data <- esp_df %>% mutate(country = ccaa) 
  try({
    data <- make_double_time(
    data = sub_data,
    the_country = this_ccaa,
    n_deaths_start = 5,
    time_ahead = 7)
  plot_double_time(data, ylog = T) +
  labs(subtitle = 'Basic log-linear model weighted at (1 + (1/ days ago)), assuming no change in growth trajectory since first day at >5 deaths')
  ggsave(paste0('/tmp/ccaa_predictions/',
                this_ccaa, '.png'),
         height = 4.9,
         width = 8.5)
  })

}
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
# all_countries <- sort(unique(df_country$country))
# for(i in 1:length(all_countries)){
#   this_country <- all_countries[i]
#   data <- make_double_time(the_country = this_country, n_deaths_start = 5)
#   if(!is.null(data)){
#     # print(this_country)
#     g <- plot_double_time(data, ylog = F) +
#   labs(subtitle = 'Basic log-linear model assuming no change in growth trajectory since first day at >5 deaths')
#     ggsave(paste0('/tmp/', this_country, '.png'), height = 5, width = 8)
#     print(data)
#   }
# }
out_list <- curve_list <-  list()
counter <- 0
for(i in 1:length(countries)){
  message(i)
  this_country <- countries[i]
  sub_data <-df_country %>% filter(country == this_country)
  # Only calculate on countries with n_cases_start or greater cases,
  # starting at the first day at n_cases_start or greater
  ok <- max(sub_data$deaths, na.rm = TRUE) >= n_deaths_start
  if(ok){
    counter <- counter + 1
    pd <- sub_data %>%
      filter(!is.na(deaths)) %>%
      mutate(start_date = min(date[deaths >= n_deaths_start])) %>%
      mutate(days_since = date - start_date) %>%
      filter(days_since >= 0) %>%
      mutate(days_since = as.numeric(days_since))
    fit <- lm(log(deaths) ~ days_since, data = pd) 
    # plot(pd$days_since, log(pd$cases))
    # abline(fit)
    ## Slope
    # curve <- fit$coef[2]
    
    # Predict days ahead
    fake <- tibble(days_since = seq(0, max(pd$days_since) + 5, by = 1))
    fake <- left_join(fake, pd %>% dplyr::select(days_since, deaths, date))
    fake$predicted <- exp(predict(fit, newdata = fake))
    
    # Doubling time
    dt <- log(2)/fit$coef[2]
    out <- tibble(country = this_country,
                  doubling_time = dt)
    out_list[[counter]] <- out
    curve_list[[counter]] <- fake %>% mutate(country = this_country)
  }
}
Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...): NA/NaN/Inf in 'y'
done <- bind_rows(out_list)
curves <- bind_rows(curve_list)
# Get curves back in exponential form
# curves$curve <- exp(curves$curve)

# Join doubling time to curves
joined <- left_join(curves, done)

# Get rid of Italy future (since it's the "leader")
joined <- joined %>%
  filter(country != 'Italy' |
           date <= (Sys.Date() -1))


# Make long format
long <- joined %>% 
  dplyr::select(date, days_since, country, deaths, predicted, doubling_time) %>%
  tidyr::gather(key, value, deaths:predicted) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))
cols <- c('red', 'black')
sub_data <-  long %>% filter(country != 'US')
ggplot(data = sub_data,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = sub_data %>% filter(key != 'Deaths'),
            size = 1.2, alpha = 0.8) +
  geom_point(data = sub_data %>% filter(key == 'Deaths')) +
  geom_line(data = sub_data %>% filter(key == 'Deaths'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >5 cumulative deaths',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = 13),
          plot.title = element_text(size = 15))

Let’s again overlay Italy.

# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Italy')
ol2 <- joined %>% filter(country == 'Italy') %>% dplyr::rename(Italy = deaths) %>%
  dplyr::select(Italy, days_since)
ol <- left_join(ol1, ol2) %>%
  dplyr::select(days_since, date, country, deaths, predicted, Italy,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Italy) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

cols <- c('red', 'blue', 'black')
sub_data <- ol %>% 
  filter(!(key == 'Predicted (based on current doubling time)' &
             country == 'Spain' &
             days_since > 13))
ggplot(data = sub_data,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = sub_data %>% filter(!key %in% c('Deaths', 'Italy')),
            size = 1.2, alpha = 0.8) +
    geom_line(data = sub_data %>% filter(key %in% c('Italy')),
            size = 0.8, alpha = 0.8) +
  geom_point(data = sub_data %>% filter(key == 'Deaths')) +
  geom_line(data = sub_data %>% filter(key == 'Deaths'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,6,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  scale_y_log10() +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >5 deaths',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = 13),
          plot.title = element_text(size = 15)) 

Let’s look just at Spain

# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Italy',
                         country == 'Spain')
ol2 <- joined %>% filter(country == 'Italy') %>% dplyr::rename(Italy = deaths) %>%
  dplyr::select(Italy, days_since)
ol <- left_join(ol1, ol2) %>%
  dplyr::select(days_since, date, country, deaths, predicted, Italy,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Italy) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', 
                      ifelse(key == 'Deaths', 'Spain', key)))

cols <- c('blue',  'black', 'red')
ggplot(data = ol,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Italy')),
            size = 1.2, alpha = 0.8) +
    geom_line(data = ol %>% filter(key %in% c('Italy')),
            size = 0.8, alpha = 0.8) +
  # geom_point(data = ol %>% filter(key == 'Deaths')) +
    geom_point(data = ol %>% filter(country == 'Spain',
                                    key == 'Spain'), size = 4, alpha = 0.6) +

  geom_line(data = ol %>% filter(key == 'Deaths'),
            size = 0.8) +
  # facet_wrap(~paste0(country, '\n',
  #                    '(doubling time: ', 
  #                    round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,6,1)) +
  scale_color_manual(name = '',
                     values = cols) +
  scale_y_log10() +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >5 deaths',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = 13),
          plot.title = element_text(size = 15),
          axis.title = element_text(size = 18))

The importance of lag

Things are changing very rapidly. And measures being taken by these countries will have an impact on the outbreak.

But it’s important to remember that there is a lag between when an intervention takes place and when its effect is notable. Because of the incubation period - the number of days between someone getting infected and becoming sick - what we do today won’t really have an effect until next weekend. And the clinical cases that present today are among people who got infected a week ago.

Disease control measures work. We can see that clearly in the case of Hubei, Wuhan, Iran, Japan. And they will work in Europe too. But because many of these measures were implemented very recently, we won’t likely see a major effect for at least a few more days.

In the mean time, it’s important to practice social distancing. Stay away from others to keep both you and others safe. Listen to Health Authorities. Take this very seriously.

Spain and Italy regions

# Madrid vs Lombardy deaths
n_death_start <- 5
pd <- esp_df %>%
  # filter(ccaa == 'Madrid') %>%
  dplyr::select(date, ccaa, cases, deaths) %>%
  bind_rows(ita %>%
              # filter(ccaa == 'Lombardia') %>%
              dplyr::select(date, ccaa, cases, deaths)) %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(first_n_death = min(date[deaths >= n_death_start])) %>%
  ungroup %>%
  mutate(days_since_n_deaths = date - first_n_death) %>%
  filter(is.finite(days_since_n_deaths))

pd$country <- pd$ccaa
pd$cases <- pd$cases
countries <- sort(unique(pd$country))
out_list <- curve_list <-  list()
counter <- 0
for(i in 1:length(countries)){
  message(i)
  this_country <- countries[i]
  sub_data <- pd %>% filter(country == this_country)
  # Only calculate on countries with n_cases_start or greater cases,
  # starting at the first day at n_cases_start or greater
  # ok <- max(sub_data$deaths, na.rm = TRUE) >= n_deaths_start
  ok <- length(which(sub_data$deaths >= n_deaths_start))
  if(ok){
    counter <- counter + 1
    sub_pd <- sub_data %>%
      filter(!is.na(deaths)) %>%
      mutate(start_date = min(date[deaths >= n_deaths_start])) %>%
      mutate(days_since = date - start_date) %>%
      filter(days_since >= 0) %>%
      mutate(days_since = as.numeric(days_since))
    fit <- lm(log(deaths) ~ days_since, data = sub_pd) 
    # plot(pd$days_since, log(pd$cases))
    # abline(fit)
    ## Slope
    # curve <- fit$coef[2]
    
    # Predict days ahead
    fake <- tibble(days_since = seq(0, max(sub_pd$days_since) + 5, by = 1))
    fake <- left_join(fake, sub_pd %>% dplyr::select(days_since, deaths, date))
    fake$predicted <- exp(predict(fit, newdata = fake))
    
    # Doubling time
    dt <- log(2)/fit$coef[2]
    out <- tibble(country = this_country,
                  doubling_time = dt)
    out_list[[counter]] <- out
    curve_list[[counter]] <- fake %>% mutate(country = this_country)
  }
}
done <- bind_rows(out_list)
curves <- bind_rows(curve_list)
# Get curves back in exponential form
# curves$curve <- exp(curves$curve)

# Join doubling time to curves
joined <- left_join(curves, done)


# Make long format
long <- joined %>% 
  dplyr::select(date, days_since, country, deaths, predicted, doubling_time) %>%
  tidyr::gather(key, value, deaths:predicted) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

# Remove those with not enough data to have a doubling time yet
long <- long %>% filter(!is.na(doubling_time))
text_size <- 12

cols <- c('red', 'black')
ggplot(data = long,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = long %>% filter(key != 'Deaths'),
            size = 1.2, alpha = 0.8) +
  geom_point(data = long %>% filter(key == 'Deaths')) +
  geom_line(data = long %>% filter(key == 'Deaths'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_y_log10() +
  scale_linetype_manual(name ='',
                        values = c(1,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >150 cumulative cases',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = text_size * 0.5),
          plot.title = element_text(size = 15))

Let’s overlay Lombardy

# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Lombardia')
ol2 <- joined %>% filter(country == 'Lombardia') %>% dplyr::rename(Lombardia = deaths) %>%
  dplyr::select(Lombardia, days_since)
ol <- left_join(ol1, ol2) %>%
  dplyr::select(days_since, date, country, deaths, predicted, Lombardia,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Lombardia) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

# Remove those with not enough data to have a doubling time yet
ol <- ol %>% filter(!is.na(doubling_time))

cols <- c('red', 'blue', 'black')
ggplot(data = ol,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  scale_y_log10() +
  geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Italy')),
            size = 1.2, alpha = 0.8) +
    geom_line(data = ol %>% filter(key %in% c('Lombardia')),
            size = 0.5, alpha = 0.8) +
  geom_point(data = ol %>% filter(key == 'Deaths')) +
  geom_line(data = ol %>% filter(key == 'Deaths'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,6,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >5 deaths',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = text_size * 0.5),
          plot.title = element_text(size = 15))

Show only Spanish regions vs. Lombardy

text_size <- 14

# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Lombardia')
ol2 <- joined %>% filter(country == 'Lombardia') %>% dplyr::rename(Lombardia = deaths) %>%
  dplyr::select(Lombardia, days_since)
ol <- left_join(ol1, ol2) %>%
  dplyr::select(days_since, date, country, deaths, predicted, Lombardia,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Lombardia) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

# Remove those with not enough data to have a doubling time yet
ol <- ol %>% filter(!is.na(doubling_time))

# Only Spain
ol <- ol %>% filter(country %in% esp_df$ccaa) %>%
  filter(!country %in% 'Aragón')

cols <- c('red', 'blue', 'black')
ggplot(data = ol,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  scale_y_log10() +
  geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Lombardia')),
            size = 1.2, alpha = 0.8) +
    geom_line(data = ol %>% filter(key %in% c('Lombardia')),
            size = 0.5, alpha = 0.8) +
  geom_point(data = ol %>% filter(key == 'Deaths')) +
  geom_line(data = ol %>% filter(key == 'Deaths'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,6,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >5 deaths',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = text_size * 0.6),
          plot.title = element_text(size = 15))

Same plot but overlayed

Same as above, but overlaid

text_size <-10

# cols <- c('red', 'black')
long <- long %>% filter(country %in% c('Lombardia',
                                       'Emilia Romagna') |
                          country %in% esp_df$ccaa) %>%
  filter(country != 'Aragón')
places <- sort(unique(long$country))

cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 7, 'Spectral'))(length(places))
cols[which(places == 'Madrid')] <- 'red'
cols[which(places == 'Cataluña')] <- 'purple'
cols[which(places == 'Lombardia')] <- 'darkorange'
cols[which(places == 'Emilia Romagna')] <- 'darkgreen'

long$key <- ifelse(long$key != 'Deaths', 'Predicted', long$key)
long$key <- ifelse(long$key == 'Predicted', 'Muertes\nprevistas',
                   'Muertes\nobservadas')


# Keep only Madrid, Lombardy, Emilia Romagna
long <- long %>%
  filter(country %in% c('Madrid',
                        'Lombardia',
                        'Emilia Romagna'))

ggplot(data = long,
       aes(x = days_since,
           y = value,
           lty = key,
           color = country)) +
  geom_point(data = long %>% filter(key == 'Muertes\nobservadas'), size = 2, alpha = 0.8) +
  geom_line(data = long %>% filter(key == 'Muertes\nprevistas'), size = 1, alpha = 0.7) +
  geom_line(data = long %>% filter(key != 'Muertes\nprevistas'), size = 0.8) +
  theme_simple() +
  scale_y_log10() +
  scale_linetype_manual(name ='',
                        values = c(1,4)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  # labs(x = 'Days since first day at 5 or more cumulative deaths',
  #      y = 'Deaths',
  #      title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
  #      caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
  #      subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    labs(x = 'Días desde el primer día a 5 o más muertes acumuladas',
       y = 'Muertes (escala logarítmica)',
       title = 'Muertes por COVID-19',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Tasa de crecimiento calculada desde el primer día a 5 o más muertes acumuladas)\n(Muertes "previstas": suponiendo que no hay cambios en la tasa de crecimiento)') +
    theme(strip.text = element_text(size = text_size * 0.75),
          plot.title = element_text(size = text_size * 3),
          legend.text = element_text(size = text_size * 1.5),
          axis.title = element_text(size = text_size * 2),
          axis.text = element_text(size = text_size * 2))

# cols <- c(cols, 'darkorange')
# ggplot(data = ol,
#        aes(x = days_since,
#            y = value,
#            lty = key,
#            color = key)) +
#   scale_y_log10() +
#   geom_line(aes(color = country)) +
#   
#   # geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Italy')),
#   #           size = 1.2, alpha = 0.8) +
#   #   geom_line(data = ol %>% filter(key %in% c('Lombardia')),
#   #           size = 0.5, alpha = 0.8) +
#   # geom_point(data = ol %>% filter(key == 'Deaths')) +
#   # geom_line(data = ol %>% filter(key == 'Deaths'),
#   #           size = 0.8) +
#   theme_simple() +
#   scale_linetype_manual(name ='',
#                         values = c(1,6,2)) +
#   scale_color_manual(name = '',
#                      values = cols) +
#   theme(legend.position = 'top') +
#   labs(x = 'Days since first day at >5 deaths',
#        y = 'Deaths',
#        title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
#        caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
#        subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
#     theme(strip.text = element_text(size = text_size * 1),
#           plot.title = element_text(size = 15))
# Map data preparation

if(!'map.RData' %in% dir()){
  esp1 <- getData(name = 'GADM', country = 'ESP', level = 1)
# Remove canary
esp1 <- esp1[esp1@data$NAME_1 != 'Islas Canarias',]
espf <- fortify(esp1, region = 'NAME_1')

# Standardize names
# Convert names
map_names <- esp1@data$NAME_1
data_names <- sort(unique(esp_df$ccaa))
names_df <- tibble(NAME_1 = c('Andalucía',
 'Aragón',
 'Cantabria',
 'Castilla-La Mancha',
 'Castilla y León',
 'Cataluña',
 'Ceuta y Melilla',
 'Comunidad de Madrid',
 'Comunidad Foral de Navarra',
 'Comunidad Valenciana',
 'Extremadura',
 'Galicia',
 'Islas Baleares',
 'La Rioja',
 'País Vasco',
 'Principado de Asturias',
 'Región de Murcia'),
 ccaa = c('Andalucía',
 'Aragón',
 'Cantabria',
 'CLM',
 'CyL',
 'Cataluña',
 'Melilla',
 'Madrid',
 'Navarra',
 'C. Valenciana',
 'Extremadura',
 'Galicia',
 'Baleares',
 'La Rioja',
 'País Vasco',
 'Asturias',
 'Murcia'))


espf <- left_join(espf %>% dplyr::rename(NAME_1 = id), names_df)
centroids <- data.frame(coordinates(esp1))
names(centroids) <- c('long', 'lat')
centroids$NAME_1 <- esp1$NAME_1
centroids <- centroids %>% left_join(names_df)

# Get random sampling points

  random_list <- list()
  for(i in 1:nrow(esp1)){
    message(i)
    shp <- esp1[i,]
    # bb <- bbox(shp)
    this_ccaa <- esp1@data$NAME_1[i]
    # xs <- runif(n = 500, min = bb[1,1], max = bb[1,2])
    # ys <- runif(n = 500, min = bb[2,1], max = bb[2,2])
    # random_points <- expand.grid(long = xs, lat = ys) %>%
    #   mutate(x = long,
    #          y = lat)
    # coordinates(random_points) <- ~x+y
    # proj4string(random_points) <- proj4string(shp)
    # get ccaa
    message('getting locations of randomly generated points')
    # polys <- over(random_points,polygons(shp))
    # polys <- as.numeric(polys)
    random_points <- spsample(shp, n = 20000, type = 'random')
    random_points <- data.frame(random_points)
    random_points$NAME_1 <-  this_ccaa
    random_points <- left_join(random_points, names_df) %>% dplyr::select(-NAME_1)
    random_list[[i]] <- random_points
  }
  random_points <- bind_rows(random_list)
  random_points <- random_points %>% mutate(long = x,
                                            lat = y)

save(espf,
     esp1,
     names_df,
     centroids,
     random_points,
     file = 'map.RData')
} else {
  load('map.RData')
}

# Define a function for adding zerio
add_zero <- 
  function (x, n) 
  {
    x <- as.character(x)
    adders <- n - nchar(x)
    adders <- ifelse(adders < 0, 0, adders)
    for (i in 1:length(x)) {
      if (!is.na(x[i])) {
        x[i] <- paste0(paste0(rep("0", adders[i]), collapse = ""), 
                       x[i], collapse = "")
      }
    }
    return(x)
  }
remake_world_map <- FALSE
options(scipen = '999')
if(remake_world_map){
  # World map animation
  world <- map_data('world')
  # world <- ne_countries(scale = "medium", returnclass = "sf")
  
  # Get plotting data
  pd <- df_country %>%
    dplyr::select(date, lng, lat, n = cases)
  dates <- sort(unique(pd$date))
  n_days <- length(dates)
  # # Define vectors for projection
  # vec_lon <- seq(30, -20, length = n_days)
  # vec_lat <- seq(25, 15, length = n_days)
  
  dir.create('animation')
  for(i in 1:n_days){
    message(i, ' of ', n_days)
    this_date <- dates[i]
    # this_lon <- vec_lon[i]
    # this_lat <- vec_lat[i]
    # the_crs <-
    #   paste0("+proj=laea +lat_0=", this_lat,
    #          " +lon_0=",
    #          this_lon,
    #          " +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs ")
    sub_data <- pd %>%
      filter(date == this_date)
    # coordinates(sub_data) <- ~lng+lat
    # proj4string(sub_data) <- proj4string(esp1)
    # # sub_data <- spTransform(sub_data,
    # #                         the_crs)
    # coordy <- coordinates(sub_data)
    # sub_data@data$long <- coordy[,1]
    # sub_data@data$lat <- coordy[,2]
  
    g <- ggplot() +
      geom_polygon(data = world,
                   aes(x = long,
                       y = lat,
                       group = group),
                   fill = 'black',
                   color = 'white',
                   size = 0.1) +
      theme_map() +
          geom_point(data = sub_data %>% filter(n > 0) %>% mutate(Deaths = n),
                 aes(x = lng,
                     y = lat,
                     size = Deaths),
                 color = 'red',
                 alpha = 0.6) +
      geom_point(data = tibble(x = c(0,0), y = c(0,0), Deaths = c(1, 100000)),
                 aes(x = x,
                     y = y,
                     size = Deaths),
                 color = 'red',
                 alpha =0.001) +
      scale_size_area(name = '', breaks = c(100, 1000, 10000, 100000),
                      max_size = 25
                      ) +
    # scale_size_area(name = '', limits = c(1, 10), breaks = c(0, 10, 30, 50, 70, 100, 200, 500)) +
      labs(title = this_date) +
      theme(plot.title = element_text(size = 30),
            legend.text = element_text(size = 15),
            legend.position = 'left')
  
    plot_number <- add_zero(i, 3)
    ggsave(filename = paste0('animation/', plot_number, '.png'),
           plot = g,
           width = 9.5,
           height = 5.1)
  }
  setwd('animation')
  system('convert -delay 30x100 -loop 0 *.png result.gif')
  setwd('..')

}

Maps of Spain

make_map <- function(var = 'deaths',
                     data = NULL,
                     pop = FALSE,
                     pop_factor = 100000,
                     points = FALSE,
                     line_color = 'white',
                     add_names = T,
                     add_values = T,
                     text_size = 2.7){
  
  if(is.null(data)){
    data <- esp_df %>%  mutate(ccaa = cat_transform(ccaa))

  }

  left <- espf %>%   mutate(ccaa = cat_transform(ccaa)) 
  right <- data[,c('ccaa', paste0(var, '_non_cum'))]
  

  names(right)[ncol(right)] <- 'var'
  right <- right %>% group_by(ccaa) %>% summarise(var = sum(var, na.rm = T))
  
  if(pop){
    right <- left_join(right, esp_pop)
    right$var <- right$var / right$pop * pop_factor
  }
  map <- left_join(left, right)
  
  if(points){
    the_points <- centroids %>%
      left_join(right)
    g <- ggplot() +
      geom_polygon(data = map,
         aes(x = long,
             y = lat,
             group = group),
         fill = 'black',
         color = line_color,
         lwd = 0.4, alpha = 0.8) +
      geom_point(data = the_points,
                 aes(x = long,
                     y = lat,
                     size = var),
                 color = 'red',
                 alpha = 0.7) +
      scale_size_area(name = '', max_size = 20)
  } else {
    # cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c')
    cols <- RColorBrewer::brewer.pal(n = 8, name = 'Blues')
    g <- ggplot(data = map,
         aes(x = long,
             y = lat,
             group = group)) +
    geom_polygon(aes(fill = var),
                 lwd = 0.3,
                 color = line_color) +
      scale_fill_gradientn(name = '',
                           colours = cols)
    # scale_fill_viridis(name = '' ,option = 'magma',
    #                    direction = -1) 
  }
  
  # Add names?
  if(add_names){
    centy <- centroids %>% left_join(right)
    if(add_values){
      centy$label <- paste0(centy$ccaa, '\n(', round(centy$var, digits = 2), ')')
    } else {
      centy$label <- centy$ccaa
    }

    g <- g +
      geom_text(data = centy,
                aes(x = long,
                    y = lat,
                    label = label,
                    group = ccaa),
                alpha = 0.7,
                size = text_size)
  }
  
  g +
    theme_map() +
    labs(subtitle = paste0('Data as of ', max(data$date))) +
    theme(legend.position = 'right')
  
}

make_dot_map <- function(var = 'deaths',
                     date = NULL,
                     pop = FALSE,
                     pop_factor = 100,
                     point_factor = 1,
                     points = FALSE,
                     point_color = 'darkred',
                     point_size = 0.6,
                     point_alpha = 0.5){
  
  
  if(is.null(date)){
    the_date <- max(esp_df$date)
  } else {
    the_date <- date
  }
    right <- esp_df[esp_df$date == the_date,c('ccaa', var)]
   names(right)[ncol(right)] <- 'var'
  if(pop){
    right <- left_join(right, esp_pop)
    right$var <- right$var / right$pop * pop_factor
  }
  map_data <- esp1@data %>%
    left_join(names_df) %>%
      left_join(right)
  map_data$var <- map_data$var / point_factor
  out_list <- list()
  for(i in 1:nrow(map_data)){
    sub_data <- map_data[i,]
    this_value = round(sub_data$var)

    if(this_value >= 1){
      this_ccaa = sub_data$ccaa
      # get some points
      sub_points <- random_points %>% filter(ccaa == this_ccaa)
      sampled_points <- sub_points %>% dplyr::sample_n(this_value)
      out_list[[i]] <- sampled_points
    }
  }
  the_points <- bind_rows(out_list)
  
  g <- ggplot() +
    geom_polygon(data = espf,
         aes(x = long,
             y = lat,
             group = group),
         fill = 'white',
         color = 'black',
         lwd = 0.4, alpha = 0.8) +
    geom_point(data = the_points,
               aes(x = long,
                   y = lat),
               color = point_color,
               size = point_size,
               alpha = point_alpha)
  g +
    theme_map() +
    labs(subtitle = paste0('Data as of ', max(esp_df$date)))
  
}

Deaths

Absolute number of deaths: points

make_map(var = 'deaths',
       points = T) +
  labs(title = 'Number of deaths',
       caption = '@joethebrew')

Absolute number of deaths: choropleth

make_map(var = 'deaths',
         line_color = 'darkgrey',
       points = F) +
  labs(title = 'Number of deaths',
       caption = '@joethebrew')

Number of deaths adjusted by population: points

make_map(var = 'deaths', pop = TRUE, points = T) +
  labs(title = 'Number of deaths per 100,000',
       caption = '@joethebrew')

Number of deaths adjusted by population: polygons

make_map(var = 'deaths', pop = TRUE, points = F, line_color = 'darkgrey') +
  labs(title = 'Number of deaths per 100,000',
       caption = '@joethebrew')

Number of deaths: 1 dot per death

make_dot_map(var = 'deaths', point_size = 0.05) +
  labs(title = 'COVID-19 deaths: 1 point = 1 death\nImportant: points are random within each CCAA; do not reflect exact location',
       caption = '@joethebrew')

Cases

Absolute number of cases: points

make_map(var = 'cases',
       points = T) +
  labs(title = 'Number of confirmed cases',
       caption = '@joethebrew')

Absolute number of cases: choropleth

make_map(var = 'cases',
         line_color = 'darkgrey',
       points = F) +
  labs(title = 'Number of confirmed cases',
       caption = '@joethebrew')

Number of cases adjusted by population: points

make_map(var = 'cases', pop = TRUE, points = T) +
  labs(title = 'Number of confirmed cases per 100,000',
       caption = '@joethebrew')

Number of cases adjusted by population: polygons

make_map(var = 'cases', pop = TRUE, points = F,
         line_color = 'darkgrey') +
  labs(title = 'Number of confirmed cases per 100,000',
       caption = '@joethebrew')

Number of cases: points

make_dot_map(var = 'cases',
             point_size = 0.05, point_alpha = 0.5, point_factor = 10) +
  labs(title = 'COVID-19 cases: 1 point = 10 cases\nImportant: points are random within each CCAA; do not reflect exact location',
       caption = '@joethebrew')